xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; DUP (integer)
10;
11
12; Don't use SVE for 64-bit vectors.
13define <8 x i8> @splat_v8i8(i8 %a) vscale_range(2,0) #0 {
14; CHECK-LABEL: splat_v8i8:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    dup v0.8b, w0
17; CHECK-NEXT:    ret
18  %insert = insertelement <8 x i8> undef, i8 %a, i64 0
19  %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
20  ret <8 x i8> %splat
21}
22
23; Don't use SVE for 128-bit vectors.
24define <16 x i8> @splat_v16i8(i8 %a) vscale_range(2,0) #0 {
25; CHECK-LABEL: splat_v16i8:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    dup v0.16b, w0
28; CHECK-NEXT:    ret
29  %insert = insertelement <16 x i8> undef, i8 %a, i64 0
30  %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
31  ret <16 x i8> %splat
32}
33
34define void @splat_v32i8(i8 %a, ptr %b) vscale_range(2,0) #0 {
35; CHECK-LABEL: splat_v32i8:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    mov z0.b, w0
38; CHECK-NEXT:    ptrue p0.b, vl32
39; CHECK-NEXT:    st1b { z0.b }, p0, [x1]
40; CHECK-NEXT:    ret
41  %insert = insertelement <32 x i8> undef, i8 %a, i64 0
42  %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
43  store <32 x i8> %splat, ptr %b
44  ret void
45}
46
47define void @splat_v64i8(i8 %a, ptr %b) #0 {
48; VBITS_GE_256-LABEL: splat_v64i8:
49; VBITS_GE_256:       // %bb.0:
50; VBITS_GE_256-NEXT:    mov z0.b, w0
51; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
52; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
53; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x1, x8]
54; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x1]
55; VBITS_GE_256-NEXT:    ret
56;
57; VBITS_GE_512-LABEL: splat_v64i8:
58; VBITS_GE_512:       // %bb.0:
59; VBITS_GE_512-NEXT:    mov z0.b, w0
60; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
61; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x1]
62; VBITS_GE_512-NEXT:    ret
63  %insert = insertelement <64 x i8> undef, i8 %a, i64 0
64  %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
65  store <64 x i8> %splat, ptr %b
66  ret void
67}
68
69define void @splat_v128i8(i8 %a, ptr %b) vscale_range(8,0) #0 {
70; CHECK-LABEL: splat_v128i8:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    mov z0.b, w0
73; CHECK-NEXT:    ptrue p0.b, vl128
74; CHECK-NEXT:    st1b { z0.b }, p0, [x1]
75; CHECK-NEXT:    ret
76  %insert = insertelement <128 x i8> undef, i8 %a, i64 0
77  %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
78  store <128 x i8> %splat, ptr %b
79  ret void
80}
81
82define void @splat_v256i8(i8 %a, ptr %b) vscale_range(16,0) #0 {
83; CHECK-LABEL: splat_v256i8:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    mov z0.b, w0
86; CHECK-NEXT:    ptrue p0.b, vl256
87; CHECK-NEXT:    st1b { z0.b }, p0, [x1]
88; CHECK-NEXT:    ret
89  %insert = insertelement <256 x i8> undef, i8 %a, i64 0
90  %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
91  store <256 x i8> %splat, ptr %b
92  ret void
93}
94
95; Don't use SVE for 64-bit vectors.
96define <4 x i16> @splat_v4i16(i16 %a) vscale_range(2,0) #0 {
97; CHECK-LABEL: splat_v4i16:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    dup v0.4h, w0
100; CHECK-NEXT:    ret
101  %insert = insertelement <4 x i16> undef, i16 %a, i64 0
102  %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
103  ret <4 x i16> %splat
104}
105
106; Don't use SVE for 128-bit vectors.
107define <8 x i16> @splat_v8i16(i16 %a) vscale_range(2,0) #0 {
108; CHECK-LABEL: splat_v8i16:
109; CHECK:       // %bb.0:
110; CHECK-NEXT:    dup v0.8h, w0
111; CHECK-NEXT:    ret
112  %insert = insertelement <8 x i16> undef, i16 %a, i64 0
113  %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
114  ret <8 x i16> %splat
115}
116
117define void @splat_v16i16(i16 %a, ptr %b) vscale_range(2,0) #0 {
118; CHECK-LABEL: splat_v16i16:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    mov z0.h, w0
121; CHECK-NEXT:    ptrue p0.h, vl16
122; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
123; CHECK-NEXT:    ret
124  %insert = insertelement <16 x i16> undef, i16 %a, i64 0
125  %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
126  store <16 x i16> %splat, ptr %b
127  ret void
128}
129
130define void @splat_v32i16(i16 %a, ptr %b) #0 {
131; VBITS_GE_256-LABEL: splat_v32i16:
132; VBITS_GE_256:       // %bb.0:
133; VBITS_GE_256-NEXT:    mov z0.h, w0
134; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
135; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
136; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
137; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1]
138; VBITS_GE_256-NEXT:    ret
139;
140; VBITS_GE_512-LABEL: splat_v32i16:
141; VBITS_GE_512:       // %bb.0:
142; VBITS_GE_512-NEXT:    mov z0.h, w0
143; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
144; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x1]
145; VBITS_GE_512-NEXT:    ret
146  %insert = insertelement <32 x i16> undef, i16 %a, i64 0
147  %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
148  store <32 x i16> %splat, ptr %b
149  ret void
150}
151
152define void @splat_v64i16(i16 %a, ptr %b) vscale_range(8,0) #0 {
153; CHECK-LABEL: splat_v64i16:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    mov z0.h, w0
156; CHECK-NEXT:    ptrue p0.h, vl64
157; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
158; CHECK-NEXT:    ret
159  %insert = insertelement <64 x i16> undef, i16 %a, i64 0
160  %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
161  store <64 x i16> %splat, ptr %b
162  ret void
163}
164
165define void @splat_v128i16(i16 %a, ptr %b) vscale_range(16,0) #0 {
166; CHECK-LABEL: splat_v128i16:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    mov z0.h, w0
169; CHECK-NEXT:    ptrue p0.h, vl128
170; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
171; CHECK-NEXT:    ret
172  %insert = insertelement <128 x i16> undef, i16 %a, i64 0
173  %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
174  store <128 x i16> %splat, ptr %b
175  ret void
176}
177
178; Don't use SVE for 64-bit vectors.
179define <2 x i32> @splat_v2i32(i32 %a) vscale_range(2,0) #0 {
180; CHECK-LABEL: splat_v2i32:
181; CHECK:       // %bb.0:
182; CHECK-NEXT:    dup v0.2s, w0
183; CHECK-NEXT:    ret
184  %insert = insertelement <2 x i32> undef, i32 %a, i64 0
185  %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
186  ret <2 x i32> %splat
187}
188
189; Don't use SVE for 128-bit vectors.
190define <4 x i32> @splat_v4i32(i32 %a) vscale_range(2,0) #0 {
191; CHECK-LABEL: splat_v4i32:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    dup v0.4s, w0
194; CHECK-NEXT:    ret
195  %insert = insertelement <4 x i32> undef, i32 %a, i64 0
196  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
197  ret <4 x i32> %splat
198}
199
200define void @splat_v8i32(i32 %a, ptr %b) vscale_range(2,0) #0 {
201; CHECK-LABEL: splat_v8i32:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    mov z0.s, w0
204; CHECK-NEXT:    ptrue p0.s, vl8
205; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
206; CHECK-NEXT:    ret
207  %insert = insertelement <8 x i32> undef, i32 %a, i64 0
208  %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
209  store <8 x i32> %splat, ptr %b
210  ret void
211}
212
213define void @splat_v16i32(i32 %a, ptr %b) #0 {
214; VBITS_GE_256-LABEL: splat_v16i32:
215; VBITS_GE_256:       // %bb.0:
216; VBITS_GE_256-NEXT:    mov z0.s, w0
217; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
218; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
219; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
220; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1]
221; VBITS_GE_256-NEXT:    ret
222;
223; VBITS_GE_512-LABEL: splat_v16i32:
224; VBITS_GE_512:       // %bb.0:
225; VBITS_GE_512-NEXT:    mov z0.s, w0
226; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
227; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x1]
228; VBITS_GE_512-NEXT:    ret
229  %insert = insertelement <16 x i32> undef, i32 %a, i64 0
230  %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
231  store <16 x i32> %splat, ptr %b
232  ret void
233}
234
235define void @splat_v32i32(i32 %a, ptr %b) vscale_range(8,0) #0 {
236; CHECK-LABEL: splat_v32i32:
237; CHECK:       // %bb.0:
238; CHECK-NEXT:    mov z0.s, w0
239; CHECK-NEXT:    ptrue p0.s, vl32
240; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
241; CHECK-NEXT:    ret
242  %insert = insertelement <32 x i32> undef, i32 %a, i64 0
243  %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
244  store <32 x i32> %splat, ptr %b
245  ret void
246}
247
248define void @splat_v64i32(i32 %a, ptr %b) vscale_range(16,0) #0 {
249; CHECK-LABEL: splat_v64i32:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    mov z0.s, w0
252; CHECK-NEXT:    ptrue p0.s, vl64
253; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
254; CHECK-NEXT:    ret
255  %insert = insertelement <64 x i32> undef, i32 %a, i64 0
256  %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
257  store <64 x i32> %splat, ptr %b
258  ret void
259}
260
261; Don't use SVE for 64-bit vectors.
262define <1 x i64> @splat_v1i64(i64 %a) vscale_range(2,0) #0 {
263; CHECK-LABEL: splat_v1i64:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    fmov d0, x0
266; CHECK-NEXT:    ret
267  %insert = insertelement <1 x i64> undef, i64 %a, i64 0
268  %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
269  ret <1 x i64> %splat
270}
271
272; Don't use SVE for 128-bit vectors.
273define <2 x i64> @splat_v2i64(i64 %a) vscale_range(2,0) #0 {
274; CHECK-LABEL: splat_v2i64:
275; CHECK:       // %bb.0:
276; CHECK-NEXT:    dup v0.2d, x0
277; CHECK-NEXT:    ret
278  %insert = insertelement <2 x i64> undef, i64 %a, i64 0
279  %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
280  ret <2 x i64> %splat
281}
282
283define void @splat_v4i64(i64 %a, ptr %b) vscale_range(2,0) #0 {
284; CHECK-LABEL: splat_v4i64:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    mov z0.d, x0
287; CHECK-NEXT:    ptrue p0.d, vl4
288; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
289; CHECK-NEXT:    ret
290  %insert = insertelement <4 x i64> undef, i64 %a, i64 0
291  %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
292  store <4 x i64> %splat, ptr %b
293  ret void
294}
295
296define void @splat_v8i64(i64 %a, ptr %b) #0 {
297; VBITS_GE_256-LABEL: splat_v8i64:
298; VBITS_GE_256:       // %bb.0:
299; VBITS_GE_256-NEXT:    mov z0.d, x0
300; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
301; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
302; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
303; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
304; VBITS_GE_256-NEXT:    ret
305;
306; VBITS_GE_512-LABEL: splat_v8i64:
307; VBITS_GE_512:       // %bb.0:
308; VBITS_GE_512-NEXT:    mov z0.d, x0
309; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
310; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x1]
311; VBITS_GE_512-NEXT:    ret
312  %insert = insertelement <8 x i64> undef, i64 %a, i64 0
313  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
314  store <8 x i64> %splat, ptr %b
315  ret void
316}
317
318define void @splat_v16i64(i64 %a, ptr %b) vscale_range(8,0) #0 {
319; CHECK-LABEL: splat_v16i64:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    mov z0.d, x0
322; CHECK-NEXT:    ptrue p0.d, vl16
323; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
324; CHECK-NEXT:    ret
325  %insert = insertelement <16 x i64> undef, i64 %a, i64 0
326  %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
327  store <16 x i64> %splat, ptr %b
328  ret void
329}
330
331define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 {
332; CHECK-LABEL: splat_v32i64:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    mov z0.d, x0
335; CHECK-NEXT:    ptrue p0.d, vl32
336; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
337; CHECK-NEXT:    ret
338  %insert = insertelement <32 x i64> undef, i64 %a, i64 0
339  %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
340  store <32 x i64> %splat, ptr %b
341  ret void
342}
343
344;
345; DUP (floating-point)
346;
347
348; Don't use SVE for 64-bit vectors.
349define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
350; CHECK-LABEL: splat_v4f16:
351; CHECK:       // %bb.0:
352; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
353; CHECK-NEXT:    dup v0.4h, v0.h[0]
354; CHECK-NEXT:    ret
355  %insert = insertelement <4 x half> undef, half %a, i64 0
356  %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
357  ret <4 x half> %splat
358}
359
360; Don't use SVE for 128-bit vectors.
361define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
362; CHECK-LABEL: splat_v8f16:
363; CHECK:       // %bb.0:
364; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
365; CHECK-NEXT:    dup v0.8h, v0.h[0]
366; CHECK-NEXT:    ret
367  %insert = insertelement <8 x half> undef, half %a, i64 0
368  %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
369  ret <8 x half> %splat
370}
371
372define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
373; CHECK-LABEL: splat_v16f16:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
376; CHECK-NEXT:    ptrue p0.h, vl16
377; CHECK-NEXT:    mov z0.h, h0
378; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
379; CHECK-NEXT:    ret
380  %insert = insertelement <16 x half> undef, half %a, i64 0
381  %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
382  store <16 x half> %splat, ptr %b
383  ret void
384}
385
386define void @splat_v32f16(half %a, ptr %b) #0 {
387; VBITS_GE_256-LABEL: splat_v32f16:
388; VBITS_GE_256:       // %bb.0:
389; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
390; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
391; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
392; VBITS_GE_256-NEXT:    mov z0.h, h0
393; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
394; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0]
395; VBITS_GE_256-NEXT:    ret
396;
397; VBITS_GE_512-LABEL: splat_v32f16:
398; VBITS_GE_512:       // %bb.0:
399; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
400; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
401; VBITS_GE_512-NEXT:    mov z0.h, h0
402; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
403; VBITS_GE_512-NEXT:    ret
404  %insert = insertelement <32 x half> undef, half %a, i64 0
405  %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
406  store <32 x half> %splat, ptr %b
407  ret void
408}
409
410define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
411; CHECK-LABEL: splat_v64f16:
412; CHECK:       // %bb.0:
413; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
414; CHECK-NEXT:    ptrue p0.h, vl64
415; CHECK-NEXT:    mov z0.h, h0
416; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
417; CHECK-NEXT:    ret
418  %insert = insertelement <64 x half> undef, half %a, i64 0
419  %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
420  store <64 x half> %splat, ptr %b
421  ret void
422}
423
424define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
425; CHECK-LABEL: splat_v128f16:
426; CHECK:       // %bb.0:
427; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
428; CHECK-NEXT:    ptrue p0.h, vl128
429; CHECK-NEXT:    mov z0.h, h0
430; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
431; CHECK-NEXT:    ret
432  %insert = insertelement <128 x half> undef, half %a, i64 0
433  %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
434  store <128 x half> %splat, ptr %b
435  ret void
436}
437
438; Don't use SVE for 64-bit vectors.
439define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 {
440; CHECK-LABEL: splat_v2f32:
441; CHECK:       // %bb.0:
442; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
443; CHECK-NEXT:    dup v0.2s, v0.s[0]
444; CHECK-NEXT:    ret
445  %insert = insertelement <2 x float> undef, float %a, i64 0
446  %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
447  ret <2 x float> %splat
448}
449
450; Don't use SVE for 128-bit vectors.
451define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 {
452; CHECK-LABEL: splat_v4f32:
453; CHECK:       // %bb.0:
454; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
455; CHECK-NEXT:    dup v0.4s, v0.s[0]
456; CHECK-NEXT:    ret
457  %insert = insertelement <4 x float> undef, float %a, i64 0
458  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
459  ret <4 x float> %splat
460}
461
462define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
463; CHECK-LABEL: splat_v8f32:
464; CHECK:       // %bb.0:
465; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
466; CHECK-NEXT:    ptrue p0.s, vl8
467; CHECK-NEXT:    mov z0.s, s0
468; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
469; CHECK-NEXT:    ret
470  %insert = insertelement <8 x float> undef, float %a, i64 0
471  %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
472  store <8 x float> %splat, ptr %b
473  ret void
474}
475
476define void @splat_v16f32(float %a, ptr %b) #0 {
477; VBITS_GE_256-LABEL: splat_v16f32:
478; VBITS_GE_256:       // %bb.0:
479; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
480; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
481; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
482; VBITS_GE_256-NEXT:    mov z0.s, s0
483; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
484; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0]
485; VBITS_GE_256-NEXT:    ret
486;
487; VBITS_GE_512-LABEL: splat_v16f32:
488; VBITS_GE_512:       // %bb.0:
489; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
490; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
491; VBITS_GE_512-NEXT:    mov z0.s, s0
492; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
493; VBITS_GE_512-NEXT:    ret
494  %insert = insertelement <16 x float> undef, float %a, i64 0
495  %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
496  store <16 x float> %splat, ptr %b
497  ret void
498}
499
500define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
501; CHECK-LABEL: splat_v32f32:
502; CHECK:       // %bb.0:
503; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
504; CHECK-NEXT:    ptrue p0.s, vl32
505; CHECK-NEXT:    mov z0.s, s0
506; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
507; CHECK-NEXT:    ret
508  %insert = insertelement <32 x float> undef, float %a, i64 0
509  %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
510  store <32 x float> %splat, ptr %b
511  ret void
512}
513
514define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 {
515; CHECK-LABEL: splat_v64f32:
516; CHECK:       // %bb.0:
517; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
518; CHECK-NEXT:    ptrue p0.s, vl64
519; CHECK-NEXT:    mov z0.s, s0
520; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
521; CHECK-NEXT:    ret
522  %insert = insertelement <64 x float> undef, float %a, i64 0
523  %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
524  store <64 x float> %splat, ptr %b
525  ret void
526}
527
528; Don't use SVE for 64-bit vectors.
529define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0) #0 {
530; CHECK-LABEL: splat_v1f64:
531; CHECK:       // %bb.0:
532; CHECK-NEXT:    ret
533  %insert = insertelement <1 x double> undef, double %a, i64 0
534  %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
535  ret <1 x double> %splat
536}
537
538; Don't use SVE for 128-bit vectors.
539define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 {
540; CHECK-LABEL: splat_v2f64:
541; CHECK:       // %bb.0:
542; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
543; CHECK-NEXT:    dup v0.2d, v0.d[0]
544; CHECK-NEXT:    ret
545  %insert = insertelement <2 x double> undef, double %a, i64 0
546  %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
547  ret <2 x double> %splat
548}
549
550define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
551; CHECK-LABEL: splat_v4f64:
552; CHECK:       // %bb.0:
553; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
554; CHECK-NEXT:    ptrue p0.d, vl4
555; CHECK-NEXT:    mov z0.d, d0
556; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
557; CHECK-NEXT:    ret
558  %insert = insertelement <4 x double> undef, double %a, i64 0
559  %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
560  store <4 x double> %splat, ptr %b
561  ret void
562}
563
564define void @splat_v8f64(double %a, ptr %b) #0 {
565; VBITS_GE_256-LABEL: splat_v8f64:
566; VBITS_GE_256:       // %bb.0:
567; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
568; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
569; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
570; VBITS_GE_256-NEXT:    mov z0.d, d0
571; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
572; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0]
573; VBITS_GE_256-NEXT:    ret
574;
575; VBITS_GE_512-LABEL: splat_v8f64:
576; VBITS_GE_512:       // %bb.0:
577; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
578; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
579; VBITS_GE_512-NEXT:    mov z0.d, d0
580; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
581; VBITS_GE_512-NEXT:    ret
582  %insert = insertelement <8 x double> undef, double %a, i64 0
583  %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
584  store <8 x double> %splat, ptr %b
585  ret void
586}
587
588define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
589; CHECK-LABEL: splat_v16f64:
590; CHECK:       // %bb.0:
591; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
592; CHECK-NEXT:    ptrue p0.d, vl16
593; CHECK-NEXT:    mov z0.d, d0
594; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
595; CHECK-NEXT:    ret
596  %insert = insertelement <16 x double> undef, double %a, i64 0
597  %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
598  store <16 x double> %splat, ptr %b
599  ret void
600}
601
602define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 {
603; CHECK-LABEL: splat_v32f64:
604; CHECK:       // %bb.0:
605; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
606; CHECK-NEXT:    ptrue p0.d, vl32
607; CHECK-NEXT:    mov z0.d, d0
608; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
609; CHECK-NEXT:    ret
610  %insert = insertelement <32 x double> undef, double %a, i64 0
611  %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
612  store <32 x double> %splat, ptr %b
613  ret void
614}
615
616;
617; DUP (integer immediate)
618;
619
620define void @splat_imm_v64i8(ptr %a) vscale_range(4,0) #0 {
621; CHECK-LABEL: splat_imm_v64i8:
622; CHECK:       // %bb.0:
623; CHECK-NEXT:    mov z0.b, #1 // =0x1
624; CHECK-NEXT:    ptrue p0.b, vl64
625; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
626; CHECK-NEXT:    ret
627  %insert = insertelement <64 x i8> undef, i8 1, i64 0
628  %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
629  store <64 x i8> %splat, ptr %a
630  ret void
631}
632
633define void @splat_imm_v32i16(ptr %a) vscale_range(4,0) #0 {
634; CHECK-LABEL: splat_imm_v32i16:
635; CHECK:       // %bb.0:
636; CHECK-NEXT:    mov z0.h, #2 // =0x2
637; CHECK-NEXT:    ptrue p0.h, vl32
638; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
639; CHECK-NEXT:    ret
640  %insert = insertelement <32 x i16> undef, i16 2, i64 0
641  %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
642  store <32 x i16> %splat, ptr %a
643  ret void
644}
645
646define void @splat_imm_v16i32(ptr %a) vscale_range(4,0) #0 {
647; CHECK-LABEL: splat_imm_v16i32:
648; CHECK:       // %bb.0:
649; CHECK-NEXT:    mov z0.s, #3 // =0x3
650; CHECK-NEXT:    ptrue p0.s, vl16
651; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
652; CHECK-NEXT:    ret
653  %insert = insertelement <16 x i32> undef, i32 3, i64 0
654  %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
655  store <16 x i32> %splat, ptr %a
656  ret void
657}
658
659define void @splat_imm_v8i64(ptr %a) vscale_range(4,0) #0 {
660; CHECK-LABEL: splat_imm_v8i64:
661; CHECK:       // %bb.0:
662; CHECK-NEXT:    mov z0.d, #4 // =0x4
663; CHECK-NEXT:    ptrue p0.d, vl8
664; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
665; CHECK-NEXT:    ret
666  %insert = insertelement <8 x i64> undef, i64 4, i64 0
667  %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
668  store <8 x i64> %splat, ptr %a
669  ret void
670}
671
672;
673; DUP (floating-point immediate)
674;
675
676define void @splat_imm_v32f16(ptr %a) vscale_range(4,0) #0 {
677; CHECK-LABEL: splat_imm_v32f16:
678; CHECK:       // %bb.0:
679; CHECK-NEXT:    fmov z0.h, #5.00000000
680; CHECK-NEXT:    ptrue p0.h, vl32
681; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
682; CHECK-NEXT:    ret
683  %insert = insertelement <32 x half> undef, half 5.0, i64 0
684  %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
685  store <32 x half> %splat, ptr %a
686  ret void
687}
688
689define void @splat_imm_v16f32(ptr %a) vscale_range(4,0) #0 {
690; CHECK-LABEL: splat_imm_v16f32:
691; CHECK:       // %bb.0:
692; CHECK-NEXT:    fmov z0.s, #6.00000000
693; CHECK-NEXT:    ptrue p0.s, vl16
694; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
695; CHECK-NEXT:    ret
696  %insert = insertelement <16 x float> undef, float 6.0, i64 0
697  %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
698  store <16 x float> %splat, ptr %a
699  ret void
700}
701
702define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 {
703; CHECK-LABEL: splat_imm_v8f64:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    fmov z0.d, #7.00000000
706; CHECK-NEXT:    ptrue p0.d, vl8
707; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
708; CHECK-NEXT:    ret
709  %insert = insertelement <8 x double> undef, double 7.0, i64 0
710  %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
711  store <8 x double> %splat, ptr %a
712  ret void
713}
714
715define void @load_splat_v8f32(ptr %a, ptr %b) vscale_range(2,2) #0 {
716; CHECK-LABEL: load_splat_v8f32:
717; CHECK:       // %bb.0:
718; CHECK-NEXT:    ptrue p0.s
719; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
720; CHECK-NEXT:    mov z0.s, s0
721; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
722; CHECK-NEXT:    ret
723  %v = load <8 x float>, ptr %a
724  %splat = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> zeroinitializer
725  store <8 x float> %splat, ptr %b
726  ret void
727}
728
729define void @load_splat_v4f64(ptr %a, ptr %b) vscale_range(2,2) #0 {
730; CHECK-LABEL: load_splat_v4f64:
731; CHECK:       // %bb.0:
732; CHECK-NEXT:    ptrue p0.d
733; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
734; CHECK-NEXT:    mov z0.d, d0
735; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
736; CHECK-NEXT:    ret
737  %v = load <4 x double>, ptr %a
738  %splat = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
739  store <4 x double> %splat, ptr %b
740  ret void
741}
742
743define void @load_splat_v32i8(ptr %a, ptr %b) vscale_range(2,2) #0 {
744; CHECK-LABEL: load_splat_v32i8:
745; CHECK:       // %bb.0:
746; CHECK-NEXT:    ptrue p0.b
747; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
748; CHECK-NEXT:    mov z0.b, b0
749; CHECK-NEXT:    st1b { z0.b }, p0, [x1]
750; CHECK-NEXT:    ret
751  %v = load <32 x i8>, ptr %a
752  %splat = shufflevector <32 x i8> %v, <32 x i8> undef, <32 x i32> zeroinitializer
753  store <32 x i8> %splat, ptr %b
754  ret void
755}
756
757define void @load_splat_v16i16(ptr %a, ptr %b) vscale_range(2,2) #0 {
758; CHECK-LABEL: load_splat_v16i16:
759; CHECK:       // %bb.0:
760; CHECK-NEXT:    ptrue p0.h
761; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
762; CHECK-NEXT:    mov z0.h, h0
763; CHECK-NEXT:    st1h { z0.h }, p0, [x1]
764; CHECK-NEXT:    ret
765  %v = load <16 x i16>, ptr %a
766  %splat = shufflevector <16 x i16> %v, <16 x i16> undef, <16 x i32> zeroinitializer
767  store <16 x i16> %splat, ptr %b
768  ret void
769}
770
771define void @load_splat_v8i32(ptr %a, ptr %b) vscale_range(2,2) #0 {
772; CHECK-LABEL: load_splat_v8i32:
773; CHECK:       // %bb.0:
774; CHECK-NEXT:    ptrue p0.s
775; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
776; CHECK-NEXT:    mov z0.s, s0
777; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
778; CHECK-NEXT:    ret
779  %v = load <8 x i32>, ptr %a
780  %splat = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> zeroinitializer
781  store <8 x i32> %splat, ptr %b
782  ret void
783}
784
785define void @load_splat_v4i64(ptr %a, ptr %b) vscale_range(2,2) #0 {
786; CHECK-LABEL: load_splat_v4i64:
787; CHECK:       // %bb.0:
788; CHECK-NEXT:    ptrue p0.d
789; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
790; CHECK-NEXT:    mov z0.d, d0
791; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
792; CHECK-NEXT:    ret
793  %v = load <4 x i64>, ptr %a
794  %splat = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> zeroinitializer
795  store <4 x i64> %splat, ptr %b
796  ret void
797}
798
799attributes #0 = { "target-features"="+sve" }
800