xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; LD1B
10;
11
12define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
13; CHECK-LABEL: masked_gather_v2i8:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    ptrue p0.d, vl2
16; CHECK-NEXT:    ldr q0, [x1]
17; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
18; CHECK-NEXT:    ptrue p0.s, vl2
19; CHECK-NEXT:    xtn v0.2s, v0.2d
20; CHECK-NEXT:    st1b { z0.s }, p0, [x0]
21; CHECK-NEXT:    ret
22  %ptrs = load <2 x ptr>, ptr %b
23  %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i8> undef)
24  store <2 x i8> %vals, ptr %a
25  ret void
26}
27
28define void @masked_gather_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
29; CHECK-LABEL: masked_gather_v4i8:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    ptrue p0.d, vl4
32; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
33; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
34; CHECK-NEXT:    st1b { z0.d }, p0, [x0]
35; CHECK-NEXT:    ret
36  %ptrs = load <4 x ptr>, ptr %b
37  %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
38  store <4 x i8> %vals, ptr %a
39  ret void
40}
41
42define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
43; VBITS_GE_256-LABEL: masked_gather_v8i8:
44; VBITS_GE_256:       // %bb.0:
45; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
46; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
47; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
48; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1]
49; VBITS_GE_256-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
50; VBITS_GE_256-NEXT:    ld1b { z1.d }, p0/z, [z1.d]
51; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
52; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
53; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
54; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
55; VBITS_GE_256-NEXT:    uzp1 v0.8b, v1.8b, v0.8b
56; VBITS_GE_256-NEXT:    str d0, [x0]
57; VBITS_GE_256-NEXT:    ret
58;
59; VBITS_GE_512-LABEL: masked_gather_v8i8:
60; VBITS_GE_512:       // %bb.0:
61; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
62; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x1]
63; VBITS_GE_512-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
64; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
65; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
66; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
67; VBITS_GE_512-NEXT:    str d0, [x0]
68; VBITS_GE_512-NEXT:    ret
69  %ptrs = load <8 x ptr>, ptr %b
70  %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
71  store <8 x i8> %vals, ptr %a
72  ret void
73}
74
75define void @masked_gather_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
76; CHECK-LABEL: masked_gather_v16i8:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    ptrue p0.d, vl16
79; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
80; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
81; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
82; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
83; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
84; CHECK-NEXT:    str q0, [x0]
85; CHECK-NEXT:    ret
86  %ptrs = load <16 x ptr>, ptr %b
87  %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
88                                                                                       i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
89  store <16 x i8> %vals, ptr %a
90  ret void
91}
92
93define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
94; CHECK-LABEL: masked_gather_v32i8:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    ptrue p0.d, vl32
97; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
98; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
99; CHECK-NEXT:    st1b { z0.d }, p0, [x0]
100; CHECK-NEXT:    ret
101  %ptrs = load <32 x ptr>, ptr %b
102  %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
103                                                                                       i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
104                                                                                       i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
105                                                                                       i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
106  store <32 x i8> %vals, ptr %a
107  ret void
108}
109
110;
111; LD1H
112;
113
114define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
115; CHECK-LABEL: masked_gather_v2i16:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    ptrue p0.d, vl2
118; CHECK-NEXT:    ldr q0, [x1]
119; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
120; CHECK-NEXT:    ptrue p0.s, vl2
121; CHECK-NEXT:    xtn v0.2s, v0.2d
122; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
123; CHECK-NEXT:    ret
124  %ptrs = load <2 x ptr>, ptr %b
125  %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
126  store <2 x i16> %vals, ptr %a
127  ret void
128}
129
130define void @masked_gather_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
131; CHECK-LABEL: masked_gather_v4i16:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ptrue p0.d, vl4
134; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
135; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
136; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
137; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
138; CHECK-NEXT:    str d0, [x0]
139; CHECK-NEXT:    ret
140  %ptrs = load <4 x ptr>, ptr %b
141  %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
142  store <4 x i16> %vals, ptr %a
143  ret void
144}
145
146define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
147; VBITS_GE_256-LABEL: masked_gather_v8i16:
148; VBITS_GE_256:       // %bb.0:
149; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
150; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
151; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
152; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1]
153; VBITS_GE_256-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
154; VBITS_GE_256-NEXT:    ld1h { z1.d }, p0/z, [z1.d]
155; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
156; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
157; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
158; VBITS_GE_256-NEXT:    uzp1 z1.h, z1.h, z1.h
159; VBITS_GE_256-NEXT:    mov v1.d[1], v0.d[0]
160; VBITS_GE_256-NEXT:    str q1, [x0]
161; VBITS_GE_256-NEXT:    ret
162;
163; VBITS_GE_512-LABEL: masked_gather_v8i16:
164; VBITS_GE_512:       // %bb.0:
165; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
166; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x1]
167; VBITS_GE_512-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
168; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
169; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
170; VBITS_GE_512-NEXT:    str q0, [x0]
171; VBITS_GE_512-NEXT:    ret
172  %ptrs = load <8 x ptr>, ptr %b
173  %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
174  store <8 x i16> %vals, ptr %a
175  ret void
176}
177
178define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
179; CHECK-LABEL: masked_gather_v16i16:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    ptrue p0.d, vl16
182; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
183; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
184; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
185; CHECK-NEXT:    ret
186  %ptrs = load <16 x ptr>, ptr %b
187  %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
188                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef)
189  store <16 x i16> %vals, ptr %a
190  ret void
191}
192
193define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
194; CHECK-LABEL: masked_gather_v32i16:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    ptrue p0.d, vl32
197; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
198; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
199; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
200; CHECK-NEXT:    ret
201  %ptrs = load <32 x ptr>, ptr %b
202  %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
203                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
204                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
205                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef)
206  store <32 x i16> %vals, ptr %a
207  ret void
208}
209
210;
211; LD1W
212;
213
214define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
215; CHECK-LABEL: masked_gather_v2i32:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    ptrue p0.d, vl2
218; CHECK-NEXT:    ldr q0, [x1]
219; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
220; CHECK-NEXT:    xtn v0.2s, v0.2d
221; CHECK-NEXT:    str d0, [x0]
222; CHECK-NEXT:    ret
223  %ptrs = load <2 x ptr>, ptr %b
224  %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
225  store <2 x i32> %vals, ptr %a
226  ret void
227}
228
229define void @masked_gather_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
230; CHECK-LABEL: masked_gather_v4i32:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    ptrue p0.d, vl4
233; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
234; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
235; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
236; CHECK-NEXT:    str q0, [x0]
237; CHECK-NEXT:    ret
238  %ptrs = load <4 x ptr>, ptr %b
239  %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
240  store <4 x i32> %vals, ptr %a
241  ret void
242}
243
244define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
245; VBITS_GE_256-LABEL: masked_gather_v8i32:
246; VBITS_GE_256:       // %bb.0:
247; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
248; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
249; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
250; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1]
251; VBITS_GE_256-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
252; VBITS_GE_256-NEXT:    ld1w { z1.d }, p0/z, [z1.d]
253; VBITS_GE_256-NEXT:    uzp1 z0.s, z0.s, z0.s
254; VBITS_GE_256-NEXT:    ptrue p0.s, vl4
255; VBITS_GE_256-NEXT:    uzp1 z1.s, z1.s, z1.s
256; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
257; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
258; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
259; VBITS_GE_256-NEXT:    ret
260;
261; VBITS_GE_512-LABEL: masked_gather_v8i32:
262; VBITS_GE_512:       // %bb.0:
263; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
264; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x1]
265; VBITS_GE_512-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
266; VBITS_GE_512-NEXT:    st1w { z0.d }, p0, [x0]
267; VBITS_GE_512-NEXT:    ret
268  %ptrs = load <8 x ptr>, ptr %b
269  %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
270  store <8 x i32> %vals, ptr %a
271  ret void
272}
273
274define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
275; CHECK-LABEL: masked_gather_v16i32:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    ptrue p0.d, vl16
278; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
279; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
280; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
281; CHECK-NEXT:    ret
282  %ptrs = load <16 x ptr>, ptr %b
283  %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
284                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
285  store <16 x i32> %vals, ptr %a
286  ret void
287}
288
289define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
290; CHECK-LABEL: masked_gather_v32i32:
291; CHECK:       // %bb.0:
292; CHECK-NEXT:    ptrue p0.d, vl32
293; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
294; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
295; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
296; CHECK-NEXT:    ret
297  %ptrs = load <32 x ptr>, ptr %b
298  %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
299                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
300                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
301                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef)
302  store <32 x i32> %vals, ptr %a
303  ret void
304}
305
306;
307; LD1D
308;
309
310define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
311; CHECK-LABEL: masked_gather_v2i64:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ptrue p0.d, vl2
314; CHECK-NEXT:    ldr q0, [x1]
315; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
316; CHECK-NEXT:    str q0, [x0]
317; CHECK-NEXT:    ret
318  %ptrs = load <2 x ptr>, ptr %b
319  %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
320  store <2 x i64> %vals, ptr %a
321  ret void
322}
323
324define void @masked_gather_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
325; CHECK-LABEL: masked_gather_v4i64:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    ptrue p0.d, vl4
328; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
329; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
330; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
331; CHECK-NEXT:    ret
332  %ptrs = load <4 x ptr>, ptr %b
333  %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef)
334  store <4 x i64> %vals, ptr %a
335  ret void
336}
337
338define void @masked_gather_v8i64(ptr %a, ptr %b) #0 {
339; VBITS_GE_256-LABEL: masked_gather_v8i64:
340; VBITS_GE_256:       // %bb.0:
341; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
342; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
343; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
344; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1]
345; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
346; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [z1.d]
347; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
348; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
349; VBITS_GE_256-NEXT:    ret
350;
351; VBITS_GE_512-LABEL: masked_gather_v8i64:
352; VBITS_GE_512:       // %bb.0:
353; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
354; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x1]
355; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
356; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
357; VBITS_GE_512-NEXT:    ret
358  %ptrs = load <8 x ptr>, ptr %b
359  %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
360  store <8 x i64> %vals, ptr %a
361  ret void
362}
363
364define void @masked_gather_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
365; CHECK-LABEL: masked_gather_v16i64:
366; CHECK:       // %bb.0:
367; CHECK-NEXT:    ptrue p0.d, vl16
368; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
369; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
370; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
371; CHECK-NEXT:    ret
372  %ptrs = load <16 x ptr>, ptr %b
373  %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
374                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef)
375  store <16 x i64> %vals, ptr %a
376  ret void
377}
378
379define void @masked_gather_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
380; CHECK-LABEL: masked_gather_v32i64:
381; CHECK:       // %bb.0:
382; CHECK-NEXT:    ptrue p0.d, vl32
383; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
384; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
385; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
386; CHECK-NEXT:    ret
387  %ptrs = load <32 x ptr>, ptr %b
388  %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
389                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
390                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
391                                                                                          i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i64> undef)
392  store <32 x i64> %vals, ptr %a
393  ret void
394}
395
396declare <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
397declare <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
398declare <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
399declare <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
400declare <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
401
402declare <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
403declare <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
404declare <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
405declare <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
406declare <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
407
408declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
409declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
410declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
411declare <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
412declare <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr>, i32, <32 x i1>, <32 x i32>)
413
414declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
415declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
416declare <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
417declare <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr>, i32, <16 x i1>, <16 x i64>)
418declare <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr>, i32, <32 x i1>, <32 x i64>)
419
420attributes #0 = { "target-features"="+sve" }
421