xref: /llvm-project/llvm/test/CodeGen/AArch64/dup.ll (revision 4c8c1308479166d00b4e1d74ceee7cf0abfe6e72)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for dup_v2i8
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v2i8
7; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v2i8
8
9define <2 x i8> @dup_v2i8(i8 %a) {
10; CHECK-LABEL: dup_v2i8:
11; CHECK:       // %bb.0: // %entry
12; CHECK-NEXT:    dup v0.2s, w0
13; CHECK-NEXT:    ret
14entry:
15  %b = insertelement <2 x i8> poison, i8 %a, i64 0
16  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
17  ret <2 x i8> %c
18}
19
20define <2 x i8> @duplane0_v2i8(<2 x i8> %b) {
21; CHECK-LABEL: duplane0_v2i8:
22; CHECK:       // %bb.0: // %entry
23; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
24; CHECK-NEXT:    dup v0.2s, v0.s[0]
25; CHECK-NEXT:    ret
26entry:
27  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
28  ret <2 x i8> %c
29}
30
31define <2 x i8> @loaddup_v2i8(ptr %p) {
32; CHECK-LABEL: loaddup_v2i8:
33; CHECK:       // %bb.0: // %entry
34; CHECK-NEXT:    ldrb w8, [x0]
35; CHECK-NEXT:    dup v0.2s, w8
36; CHECK-NEXT:    ret
37entry:
38  %a = load i8, ptr %p
39  %b = insertelement <2 x i8> poison, i8 %a, i64 0
40  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
41  ret <2 x i8> %c
42}
43
44define <3 x i8> @dup_v3i8(i8 %a) {
45; CHECK-SD-LABEL: dup_v3i8:
46; CHECK-SD:       // %bb.0: // %entry
47; CHECK-SD-NEXT:    mov w1, w0
48; CHECK-SD-NEXT:    mov w2, w0
49; CHECK-SD-NEXT:    ret
50;
51; CHECK-GI-LABEL: dup_v3i8:
52; CHECK-GI:       // %bb.0: // %entry
53; CHECK-GI-NEXT:    dup v0.8b, w0
54; CHECK-GI-NEXT:    umov w0, v0.b[0]
55; CHECK-GI-NEXT:    umov w1, v0.b[1]
56; CHECK-GI-NEXT:    umov w2, v0.b[2]
57; CHECK-GI-NEXT:    ret
58entry:
59  %b = insertelement <3 x i8> poison, i8 %a, i64 0
60  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
61  ret <3 x i8> %c
62}
63
64define <3 x i8> @duplane0_v3i8(<3 x i8> %b) {
65; CHECK-SD-LABEL: duplane0_v3i8:
66; CHECK-SD:       // %bb.0: // %entry
67; CHECK-SD-NEXT:    mov w1, w0
68; CHECK-SD-NEXT:    mov w2, w0
69; CHECK-SD-NEXT:    ret
70;
71; CHECK-GI-LABEL: duplane0_v3i8:
72; CHECK-GI:       // %bb.0: // %entry
73; CHECK-GI-NEXT:    fmov s0, w0
74; CHECK-GI-NEXT:    mov v0.b[1], w1
75; CHECK-GI-NEXT:    mov v0.b[2], w2
76; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
77; CHECK-GI-NEXT:    umov w0, v0.b[0]
78; CHECK-GI-NEXT:    umov w1, v0.b[1]
79; CHECK-GI-NEXT:    umov w2, v0.b[2]
80; CHECK-GI-NEXT:    ret
81entry:
82  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
83  ret <3 x i8> %c
84}
85
86define <3 x i8> @loaddup_v3i8(ptr %p) {
87; CHECK-SD-LABEL: loaddup_v3i8:
88; CHECK-SD:       // %bb.0: // %entry
89; CHECK-SD-NEXT:    ldrb w0, [x0]
90; CHECK-SD-NEXT:    mov w1, w0
91; CHECK-SD-NEXT:    mov w2, w0
92; CHECK-SD-NEXT:    ret
93;
94; CHECK-GI-LABEL: loaddup_v3i8:
95; CHECK-GI:       // %bb.0: // %entry
96; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
97; CHECK-GI-NEXT:    umov w0, v0.b[0]
98; CHECK-GI-NEXT:    umov w1, v0.b[1]
99; CHECK-GI-NEXT:    umov w2, v0.b[2]
100; CHECK-GI-NEXT:    ret
101entry:
102  %a = load i8, ptr %p
103  %b = insertelement <3 x i8> poison, i8 %a, i64 0
104  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
105  ret <3 x i8> %c
106}
107
108define <4 x i8> @dup_v4i8(i8 %a) {
109; CHECK-SD-LABEL: dup_v4i8:
110; CHECK-SD:       // %bb.0: // %entry
111; CHECK-SD-NEXT:    dup v0.4h, w0
112; CHECK-SD-NEXT:    ret
113;
114; CHECK-GI-LABEL: dup_v4i8:
115; CHECK-GI:       // %bb.0: // %entry
116; CHECK-GI-NEXT:    dup v0.8b, w0
117; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
118; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
119; CHECK-GI-NEXT:    ret
120entry:
121  %b = insertelement <4 x i8> poison, i8 %a, i64 0
122  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
123  ret <4 x i8> %c
124}
125
126define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
127; CHECK-SD-LABEL: duplane0_v4i8:
128; CHECK-SD:       // %bb.0: // %entry
129; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
130; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
131; CHECK-SD-NEXT:    ret
132;
133; CHECK-GI-LABEL: duplane0_v4i8:
134; CHECK-GI:       // %bb.0: // %entry
135; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
136; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
137; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
138; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
139; CHECK-GI-NEXT:    ret
140entry:
141  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
142  ret <4 x i8> %c
143}
144
145define <4 x i8> @loaddup_v4i8(ptr %p) {
146; CHECK-SD-LABEL: loaddup_v4i8:
147; CHECK-SD:       // %bb.0: // %entry
148; CHECK-SD-NEXT:    ldrb w8, [x0]
149; CHECK-SD-NEXT:    dup v0.4h, w8
150; CHECK-SD-NEXT:    ret
151;
152; CHECK-GI-LABEL: loaddup_v4i8:
153; CHECK-GI:       // %bb.0: // %entry
154; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
155; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
156; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
157; CHECK-GI-NEXT:    ret
158entry:
159  %a = load i8, ptr %p
160  %b = insertelement <4 x i8> poison, i8 %a, i64 0
161  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
162  ret <4 x i8> %c
163}
164
165define <8 x i8> @dup_v8i8(i8 %a) {
166; CHECK-LABEL: dup_v8i8:
167; CHECK:       // %bb.0: // %entry
168; CHECK-NEXT:    dup v0.8b, w0
169; CHECK-NEXT:    ret
170entry:
171  %b = insertelement <8 x i8> poison, i8 %a, i64 0
172  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
173  ret <8 x i8> %c
174}
175
176define <8 x i8> @duplane0_v8i8(<8 x i8> %b) {
177; CHECK-LABEL: duplane0_v8i8:
178; CHECK:       // %bb.0: // %entry
179; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
180; CHECK-NEXT:    dup v0.8b, v0.b[0]
181; CHECK-NEXT:    ret
182entry:
183  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
184  ret <8 x i8> %c
185}
186
187define <8 x i8> @loaddup_v8i8(ptr %p) {
188; CHECK-LABEL: loaddup_v8i8:
189; CHECK:       // %bb.0: // %entry
190; CHECK-NEXT:    ld1r { v0.8b }, [x0]
191; CHECK-NEXT:    ret
192entry:
193  %a = load i8, ptr %p
194  %b = insertelement <8 x i8> poison, i8 %a, i64 0
195  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
196  ret <8 x i8> %c
197}
198
199define <16 x i8> @dup_v16i8(i8 %a) {
200; CHECK-LABEL: dup_v16i8:
201; CHECK:       // %bb.0: // %entry
202; CHECK-NEXT:    dup v0.16b, w0
203; CHECK-NEXT:    ret
204entry:
205  %b = insertelement <16 x i8> poison, i8 %a, i64 0
206  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
207  ret <16 x i8> %c
208}
209
210define <16 x i8> @duplane0_v16i8(<16 x i8> %b) {
211; CHECK-LABEL: duplane0_v16i8:
212; CHECK:       // %bb.0: // %entry
213; CHECK-NEXT:    dup v0.16b, v0.b[0]
214; CHECK-NEXT:    ret
215entry:
216  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
217  ret <16 x i8> %c
218}
219
220define <16 x i8> @loaddup_v16i8(ptr %p) {
221; CHECK-LABEL: loaddup_v16i8:
222; CHECK:       // %bb.0: // %entry
223; CHECK-NEXT:    ld1r { v0.16b }, [x0]
224; CHECK-NEXT:    ret
225entry:
226  %a = load i8, ptr %p
227  %b = insertelement <16 x i8> poison, i8 %a, i64 0
228  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
229  ret <16 x i8> %c
230}
231
232define <32 x i8> @dup_v32i8(i8 %a) {
233; CHECK-LABEL: dup_v32i8:
234; CHECK:       // %bb.0: // %entry
235; CHECK-NEXT:    dup v0.16b, w0
236; CHECK-NEXT:    mov v1.16b, v0.16b
237; CHECK-NEXT:    ret
238entry:
239  %b = insertelement <32 x i8> poison, i8 %a, i64 0
240  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
241  ret <32 x i8> %c
242}
243
244define <32 x i8> @duplane0_v32i8(<32 x i8> %b) {
245; CHECK-LABEL: duplane0_v32i8:
246; CHECK:       // %bb.0: // %entry
247; CHECK-NEXT:    dup v0.16b, v0.b[0]
248; CHECK-NEXT:    mov v1.16b, v0.16b
249; CHECK-NEXT:    ret
250entry:
251  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
252  ret <32 x i8> %c
253}
254
255define <32 x i8> @loaddup_v32i8(ptr %p) {
256; CHECK-SD-LABEL: loaddup_v32i8:
257; CHECK-SD:       // %bb.0: // %entry
258; CHECK-SD-NEXT:    ld1r { v0.16b }, [x0]
259; CHECK-SD-NEXT:    mov v1.16b, v0.16b
260; CHECK-SD-NEXT:    ret
261;
262; CHECK-GI-LABEL: loaddup_v32i8:
263; CHECK-GI:       // %bb.0: // %entry
264; CHECK-GI-NEXT:    ld1r { v0.16b }, [x0]
265; CHECK-GI-NEXT:    ld1r { v1.16b }, [x0]
266; CHECK-GI-NEXT:    ret
267entry:
268  %a = load i8, ptr %p
269  %b = insertelement <32 x i8> poison, i8 %a, i64 0
270  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
271  ret <32 x i8> %c
272}
273
274define <2 x i16> @dup_v2i16(i16 %a) {
275; CHECK-SD-LABEL: dup_v2i16:
276; CHECK-SD:       // %bb.0: // %entry
277; CHECK-SD-NEXT:    dup v0.2s, w0
278; CHECK-SD-NEXT:    ret
279;
280; CHECK-GI-LABEL: dup_v2i16:
281; CHECK-GI:       // %bb.0: // %entry
282; CHECK-GI-NEXT:    dup v0.4h, w0
283; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
284; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
285; CHECK-GI-NEXT:    ret
286entry:
287  %b = insertelement <2 x i16> poison, i16 %a, i64 0
288  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
289  ret <2 x i16> %c
290}
291
292define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
293; CHECK-SD-LABEL: duplane0_v2i16:
294; CHECK-SD:       // %bb.0: // %entry
295; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
296; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
297; CHECK-SD-NEXT:    ret
298;
299; CHECK-GI-LABEL: duplane0_v2i16:
300; CHECK-GI:       // %bb.0: // %entry
301; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
302; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
303; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
304; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
305; CHECK-GI-NEXT:    ret
306entry:
307  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
308  ret <2 x i16> %c
309}
310
311define <2 x i16> @loaddup_v2i16(ptr %p) {
312; CHECK-SD-LABEL: loaddup_v2i16:
313; CHECK-SD:       // %bb.0: // %entry
314; CHECK-SD-NEXT:    ldrh w8, [x0]
315; CHECK-SD-NEXT:    dup v0.2s, w8
316; CHECK-SD-NEXT:    ret
317;
318; CHECK-GI-LABEL: loaddup_v2i16:
319; CHECK-GI:       // %bb.0: // %entry
320; CHECK-GI-NEXT:    ld1r { v0.4h }, [x0]
321; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
322; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
323; CHECK-GI-NEXT:    ret
324entry:
325  %a = load i16, ptr %p
326  %b = insertelement <2 x i16> poison, i16 %a, i64 0
327  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
328  ret <2 x i16> %c
329}
330
331define <3 x i16> @dup_v3i16(i16 %a) {
332; CHECK-LABEL: dup_v3i16:
333; CHECK:       // %bb.0: // %entry
334; CHECK-NEXT:    dup v0.4h, w0
335; CHECK-NEXT:    ret
336entry:
337  %b = insertelement <3 x i16> poison, i16 %a, i64 0
338  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
339  ret <3 x i16> %c
340}
341
342define <3 x i16> @duplane0_v3i16(<3 x i16> %b) {
343; CHECK-LABEL: duplane0_v3i16:
344; CHECK:       // %bb.0: // %entry
345; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
346; CHECK-NEXT:    dup v0.4h, v0.h[0]
347; CHECK-NEXT:    ret
348entry:
349  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
350  ret <3 x i16> %c
351}
352
353define <3 x i16> @loaddup_v3i16(ptr %p) {
354; CHECK-LABEL: loaddup_v3i16:
355; CHECK:       // %bb.0: // %entry
356; CHECK-NEXT:    ld1r { v0.4h }, [x0]
357; CHECK-NEXT:    ret
358entry:
359  %a = load i16, ptr %p
360  %b = insertelement <3 x i16> poison, i16 %a, i64 0
361  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
362  ret <3 x i16> %c
363}
364
365define <4 x i16> @dup_v4i16(i16 %a) {
366; CHECK-LABEL: dup_v4i16:
367; CHECK:       // %bb.0: // %entry
368; CHECK-NEXT:    dup v0.4h, w0
369; CHECK-NEXT:    ret
370entry:
371  %b = insertelement <4 x i16> poison, i16 %a, i64 0
372  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
373  ret <4 x i16> %c
374}
375
376define <4 x i16> @duplane0_v4i16(<4 x i16> %b) {
377; CHECK-LABEL: duplane0_v4i16:
378; CHECK:       // %bb.0: // %entry
379; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
380; CHECK-NEXT:    dup v0.4h, v0.h[0]
381; CHECK-NEXT:    ret
382entry:
383  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
384  ret <4 x i16> %c
385}
386
387define <4 x i16> @loaddup_v4i16(ptr %p) {
388; CHECK-LABEL: loaddup_v4i16:
389; CHECK:       // %bb.0: // %entry
390; CHECK-NEXT:    ld1r { v0.4h }, [x0]
391; CHECK-NEXT:    ret
392entry:
393  %a = load i16, ptr %p
394  %b = insertelement <4 x i16> poison, i16 %a, i64 0
395  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
396  ret <4 x i16> %c
397}
398
399define <8 x i16> @dup_v8i16(i16 %a) {
400; CHECK-LABEL: dup_v8i16:
401; CHECK:       // %bb.0: // %entry
402; CHECK-NEXT:    dup v0.8h, w0
403; CHECK-NEXT:    ret
404entry:
405  %b = insertelement <8 x i16> poison, i16 %a, i64 0
406  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
407  ret <8 x i16> %c
408}
409
410define <8 x i16> @duplane0_v8i16(<8 x i16> %b) {
411; CHECK-LABEL: duplane0_v8i16:
412; CHECK:       // %bb.0: // %entry
413; CHECK-NEXT:    dup v0.8h, v0.h[0]
414; CHECK-NEXT:    ret
415entry:
416  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
417  ret <8 x i16> %c
418}
419
420define <8 x i16> @loaddup_v8i16(ptr %p) {
421; CHECK-LABEL: loaddup_v8i16:
422; CHECK:       // %bb.0: // %entry
423; CHECK-NEXT:    ld1r { v0.8h }, [x0]
424; CHECK-NEXT:    ret
425entry:
426  %a = load i16, ptr %p
427  %b = insertelement <8 x i16> poison, i16 %a, i64 0
428  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
429  ret <8 x i16> %c
430}
431
432define <16 x i16> @dup_v16i16(i16 %a) {
433; CHECK-LABEL: dup_v16i16:
434; CHECK:       // %bb.0: // %entry
435; CHECK-NEXT:    dup v0.8h, w0
436; CHECK-NEXT:    mov v1.16b, v0.16b
437; CHECK-NEXT:    ret
438entry:
439  %b = insertelement <16 x i16> poison, i16 %a, i64 0
440  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
441  ret <16 x i16> %c
442}
443
444define <16 x i16> @duplane0_v16i16(<16 x i16> %b) {
445; CHECK-LABEL: duplane0_v16i16:
446; CHECK:       // %bb.0: // %entry
447; CHECK-NEXT:    dup v0.8h, v0.h[0]
448; CHECK-NEXT:    mov v1.16b, v0.16b
449; CHECK-NEXT:    ret
450entry:
451  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
452  ret <16 x i16> %c
453}
454
455define <16 x i16> @loaddup_v16i16(ptr %p) {
456; CHECK-SD-LABEL: loaddup_v16i16:
457; CHECK-SD:       // %bb.0: // %entry
458; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
459; CHECK-SD-NEXT:    mov v1.16b, v0.16b
460; CHECK-SD-NEXT:    ret
461;
462; CHECK-GI-LABEL: loaddup_v16i16:
463; CHECK-GI:       // %bb.0: // %entry
464; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
465; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
466; CHECK-GI-NEXT:    ret
467entry:
468  %a = load i16, ptr %p
469  %b = insertelement <16 x i16> poison, i16 %a, i64 0
470  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
471  ret <16 x i16> %c
472}
473
474define <2 x i32> @dup_v2i32(i32 %a) {
475; CHECK-LABEL: dup_v2i32:
476; CHECK:       // %bb.0: // %entry
477; CHECK-NEXT:    dup v0.2s, w0
478; CHECK-NEXT:    ret
479entry:
480  %b = insertelement <2 x i32> poison, i32 %a, i64 0
481  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
482  ret <2 x i32> %c
483}
484
485define <2 x i32> @duplane0_v2i32(<2 x i32> %b) {
486; CHECK-LABEL: duplane0_v2i32:
487; CHECK:       // %bb.0: // %entry
488; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
489; CHECK-NEXT:    dup v0.2s, v0.s[0]
490; CHECK-NEXT:    ret
491entry:
492  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
493  ret <2 x i32> %c
494}
495
496define <2 x i32> @loaddup_v2i32(ptr %p) {
497; CHECK-LABEL: loaddup_v2i32:
498; CHECK:       // %bb.0: // %entry
499; CHECK-NEXT:    ld1r { v0.2s }, [x0]
500; CHECK-NEXT:    ret
501entry:
502  %a = load i32, ptr %p
503  %b = insertelement <2 x i32> poison, i32 %a, i64 0
504  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
505  ret <2 x i32> %c
506}
507
508define <3 x i32> @dup_v3i32(i32 %a) {
509; CHECK-LABEL: dup_v3i32:
510; CHECK:       // %bb.0: // %entry
511; CHECK-NEXT:    dup v0.4s, w0
512; CHECK-NEXT:    ret
513entry:
514  %b = insertelement <3 x i32> poison, i32 %a, i64 0
515  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
516  ret <3 x i32> %c
517}
518
519define <3 x i32> @duplane0_v3i32(<3 x i32> %b) {
520; CHECK-LABEL: duplane0_v3i32:
521; CHECK:       // %bb.0: // %entry
522; CHECK-NEXT:    dup v0.4s, v0.s[0]
523; CHECK-NEXT:    ret
524entry:
525  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
526  ret <3 x i32> %c
527}
528
529define <3 x i32> @loaddup_v3i32(ptr %p) {
530; CHECK-LABEL: loaddup_v3i32:
531; CHECK:       // %bb.0: // %entry
532; CHECK-NEXT:    ld1r { v0.4s }, [x0]
533; CHECK-NEXT:    ret
534entry:
535  %a = load i32, ptr %p
536  %b = insertelement <3 x i32> poison, i32 %a, i64 0
537  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
538  ret <3 x i32> %c
539}
540
541define <4 x i32> @dup_v4i32(i32 %a) {
542; CHECK-LABEL: dup_v4i32:
543; CHECK:       // %bb.0: // %entry
544; CHECK-NEXT:    dup v0.4s, w0
545; CHECK-NEXT:    ret
546entry:
547  %b = insertelement <4 x i32> poison, i32 %a, i64 0
548  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
549  ret <4 x i32> %c
550}
551
552define <4 x i32> @duplane0_v4i32(<4 x i32> %b) {
553; CHECK-LABEL: duplane0_v4i32:
554; CHECK:       // %bb.0: // %entry
555; CHECK-NEXT:    dup v0.4s, v0.s[0]
556; CHECK-NEXT:    ret
557entry:
558  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
559  ret <4 x i32> %c
560}
561
562define <4 x i32> @loaddup_v4i32(ptr %p) {
563; CHECK-LABEL: loaddup_v4i32:
564; CHECK:       // %bb.0: // %entry
565; CHECK-NEXT:    ld1r { v0.4s }, [x0]
566; CHECK-NEXT:    ret
567entry:
568  %a = load i32, ptr %p
569  %b = insertelement <4 x i32> poison, i32 %a, i64 0
570  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
571  ret <4 x i32> %c
572}
573
574define <8 x i32> @dup_v8i32(i32 %a) {
575; CHECK-LABEL: dup_v8i32:
576; CHECK:       // %bb.0: // %entry
577; CHECK-NEXT:    dup v0.4s, w0
578; CHECK-NEXT:    mov v1.16b, v0.16b
579; CHECK-NEXT:    ret
580entry:
581  %b = insertelement <8 x i32> poison, i32 %a, i64 0
582  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
583  ret <8 x i32> %c
584}
585
586define <8 x i32> @duplane0_v8i32(<8 x i32> %b) {
587; CHECK-LABEL: duplane0_v8i32:
588; CHECK:       // %bb.0: // %entry
589; CHECK-NEXT:    dup v0.4s, v0.s[0]
590; CHECK-NEXT:    mov v1.16b, v0.16b
591; CHECK-NEXT:    ret
592entry:
593  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
594  ret <8 x i32> %c
595}
596
597define <8 x i32> @loaddup_v8i32(ptr %p) {
598; CHECK-SD-LABEL: loaddup_v8i32:
599; CHECK-SD:       // %bb.0: // %entry
600; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
601; CHECK-SD-NEXT:    mov v1.16b, v0.16b
602; CHECK-SD-NEXT:    ret
603;
604; CHECK-GI-LABEL: loaddup_v8i32:
605; CHECK-GI:       // %bb.0: // %entry
606; CHECK-GI-NEXT:    ld1r { v0.4s }, [x0]
607; CHECK-GI-NEXT:    ld1r { v1.4s }, [x0]
608; CHECK-GI-NEXT:    ret
609entry:
610  %a = load i32, ptr %p
611  %b = insertelement <8 x i32> poison, i32 %a, i64 0
612  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
613  ret <8 x i32> %c
614}
615
616define <2 x i64> @dup_v2i64(i64 %a) {
617; CHECK-LABEL: dup_v2i64:
618; CHECK:       // %bb.0: // %entry
619; CHECK-NEXT:    dup v0.2d, x0
620; CHECK-NEXT:    ret
621entry:
622  %b = insertelement <2 x i64> poison, i64 %a, i64 0
623  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
624  ret <2 x i64> %c
625}
626
627define <2 x i64> @duplane0_v2i64(<2 x i64> %b) {
628; CHECK-LABEL: duplane0_v2i64:
629; CHECK:       // %bb.0: // %entry
630; CHECK-NEXT:    dup v0.2d, v0.d[0]
631; CHECK-NEXT:    ret
632entry:
633  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
634  ret <2 x i64> %c
635}
636
637define <2 x i64> @loaddup_v2i64(ptr %p) {
638; CHECK-LABEL: loaddup_v2i64:
639; CHECK:       // %bb.0: // %entry
640; CHECK-NEXT:    ld1r { v0.2d }, [x0]
641; CHECK-NEXT:    ret
642entry:
643  %a = load i64, ptr %p
644  %b = insertelement <2 x i64> poison, i64 %a, i64 0
645  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
646  ret <2 x i64> %c
647}
648
649define <3 x i64> @dup_v3i64(i64 %a) {
650; CHECK-SD-LABEL: dup_v3i64:
651; CHECK-SD:       // %bb.0: // %entry
652; CHECK-SD-NEXT:    fmov d0, x0
653; CHECK-SD-NEXT:    fmov d1, d0
654; CHECK-SD-NEXT:    fmov d2, d0
655; CHECK-SD-NEXT:    ret
656;
657; CHECK-GI-LABEL: dup_v3i64:
658; CHECK-GI:       // %bb.0: // %entry
659; CHECK-GI-NEXT:    dup v0.2d, x0
660; CHECK-GI-NEXT:    mov d1, v0.d[1]
661; CHECK-GI-NEXT:    fmov d2, d0
662; CHECK-GI-NEXT:    ret
663entry:
664  %b = insertelement <3 x i64> poison, i64 %a, i64 0
665  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
666  ret <3 x i64> %c
667}
668
669define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
670; CHECK-SD-LABEL: duplane0_v3i64:
671; CHECK-SD:       // %bb.0: // %entry
672; CHECK-SD-NEXT:    fmov d1, d0
673; CHECK-SD-NEXT:    fmov d2, d0
674; CHECK-SD-NEXT:    ret
675;
676; CHECK-GI-LABEL: duplane0_v3i64:
677; CHECK-GI:       // %bb.0: // %entry
678; CHECK-GI-NEXT:    fmov d2, d0
679; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
680; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
681; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
682; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
683; CHECK-GI-NEXT:    mov d1, v0.d[1]
684; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
685; CHECK-GI-NEXT:    ret
686entry:
687  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
688  ret <3 x i64> %c
689}
690
691define <3 x i64> @loaddup_v3i64(ptr %p) {
692; CHECK-SD-LABEL: loaddup_v3i64:
693; CHECK-SD:       // %bb.0: // %entry
694; CHECK-SD-NEXT:    ldr d0, [x0]
695; CHECK-SD-NEXT:    fmov d1, d0
696; CHECK-SD-NEXT:    fmov d2, d0
697; CHECK-SD-NEXT:    ret
698;
699; CHECK-GI-LABEL: loaddup_v3i64:
700; CHECK-GI:       // %bb.0: // %entry
701; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
702; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
703; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
704; CHECK-GI-NEXT:    mov d1, v0.d[1]
705; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
706; CHECK-GI-NEXT:    ret
707entry:
708  %a = load i64, ptr %p
709  %b = insertelement <3 x i64> poison, i64 %a, i64 0
710  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
711  ret <3 x i64> %c
712}
713
714define <4 x i64> @dup_v4i64(i64 %a) {
715; CHECK-LABEL: dup_v4i64:
716; CHECK:       // %bb.0: // %entry
717; CHECK-NEXT:    dup v0.2d, x0
718; CHECK-NEXT:    mov v1.16b, v0.16b
719; CHECK-NEXT:    ret
720entry:
721  %b = insertelement <4 x i64> poison, i64 %a, i64 0
722  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
723  ret <4 x i64> %c
724}
725
726define <4 x i64> @duplane0_v4i64(<4 x i64> %b) {
727; CHECK-LABEL: duplane0_v4i64:
728; CHECK:       // %bb.0: // %entry
729; CHECK-NEXT:    dup v0.2d, v0.d[0]
730; CHECK-NEXT:    mov v1.16b, v0.16b
731; CHECK-NEXT:    ret
732entry:
733  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
734  ret <4 x i64> %c
735}
736
737define <4 x i64> @loaddup_v4i64(ptr %p) {
738; CHECK-SD-LABEL: loaddup_v4i64:
739; CHECK-SD:       // %bb.0: // %entry
740; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
741; CHECK-SD-NEXT:    mov v1.16b, v0.16b
742; CHECK-SD-NEXT:    ret
743;
744; CHECK-GI-LABEL: loaddup_v4i64:
745; CHECK-GI:       // %bb.0: // %entry
746; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
747; CHECK-GI-NEXT:    ld1r { v1.2d }, [x0]
748; CHECK-GI-NEXT:    ret
749entry:
750  %a = load i64, ptr %p
751  %b = insertelement <4 x i64> poison, i64 %a, i64 0
752  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
753  ret <4 x i64> %c
754}
755
756define <2 x i128> @dup_v2i128(i128 %a) {
757; CHECK-LABEL: dup_v2i128:
758; CHECK:       // %bb.0: // %entry
759; CHECK-NEXT:    mov x2, x0
760; CHECK-NEXT:    mov x3, x1
761; CHECK-NEXT:    ret
762entry:
763  %b = insertelement <2 x i128> poison, i128 %a, i64 0
764  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
765  ret <2 x i128> %c
766}
767
768define <2 x i128> @duplane0_v2i128(<2 x i128> %b) {
769; CHECK-LABEL: duplane0_v2i128:
770; CHECK:       // %bb.0: // %entry
771; CHECK-NEXT:    mov x2, x0
772; CHECK-NEXT:    mov x3, x1
773; CHECK-NEXT:    ret
774entry:
775  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
776  ret <2 x i128> %c
777}
778
779define <2 x i128> @loaddup_v2i128(ptr %p) {
780; CHECK-SD-LABEL: loaddup_v2i128:
781; CHECK-SD:       // %bb.0: // %entry
782; CHECK-SD-NEXT:    ldp x2, x1, [x0]
783; CHECK-SD-NEXT:    mov x0, x2
784; CHECK-SD-NEXT:    mov x3, x1
785; CHECK-SD-NEXT:    ret
786;
787; CHECK-GI-LABEL: loaddup_v2i128:
788; CHECK-GI:       // %bb.0: // %entry
789; CHECK-GI-NEXT:    ldr q0, [x0]
790; CHECK-GI-NEXT:    mov d1, v0.d[1]
791; CHECK-GI-NEXT:    fmov x0, d0
792; CHECK-GI-NEXT:    fmov x2, d0
793; CHECK-GI-NEXT:    fmov x1, d1
794; CHECK-GI-NEXT:    fmov x3, d1
795; CHECK-GI-NEXT:    ret
796entry:
797  %a = load i128, ptr %p
798  %b = insertelement <2 x i128> poison, i128 %a, i64 0
799  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
800  ret <2 x i128> %c
801}
802
803define <3 x i128> @dup_v3i128(i128 %a) {
804; CHECK-LABEL: dup_v3i128:
805; CHECK:       // %bb.0: // %entry
806; CHECK-NEXT:    mov x2, x0
807; CHECK-NEXT:    mov x3, x1
808; CHECK-NEXT:    mov x4, x0
809; CHECK-NEXT:    mov x5, x1
810; CHECK-NEXT:    ret
811entry:
812  %b = insertelement <3 x i128> poison, i128 %a, i64 0
813  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
814  ret <3 x i128> %c
815}
816
817define <3 x i128> @duplane0_v3i128(<3 x i128> %b) {
818; CHECK-LABEL: duplane0_v3i128:
819; CHECK:       // %bb.0: // %entry
820; CHECK-NEXT:    mov x2, x0
821; CHECK-NEXT:    mov x3, x1
822; CHECK-NEXT:    mov x4, x0
823; CHECK-NEXT:    mov x5, x1
824; CHECK-NEXT:    ret
825entry:
826  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
827  ret <3 x i128> %c
828}
829
830define <3 x i128> @loaddup_v3i128(ptr %p) {
831; CHECK-SD-LABEL: loaddup_v3i128:
832; CHECK-SD:       // %bb.0: // %entry
833; CHECK-SD-NEXT:    ldp x2, x1, [x0]
834; CHECK-SD-NEXT:    mov x0, x2
835; CHECK-SD-NEXT:    mov x3, x1
836; CHECK-SD-NEXT:    mov x4, x2
837; CHECK-SD-NEXT:    mov x5, x1
838; CHECK-SD-NEXT:    ret
839;
840; CHECK-GI-LABEL: loaddup_v3i128:
841; CHECK-GI:       // %bb.0: // %entry
842; CHECK-GI-NEXT:    ldr q0, [x0]
843; CHECK-GI-NEXT:    mov d1, v0.d[1]
844; CHECK-GI-NEXT:    fmov x0, d0
845; CHECK-GI-NEXT:    fmov x2, d0
846; CHECK-GI-NEXT:    fmov x4, d0
847; CHECK-GI-NEXT:    fmov x1, d1
848; CHECK-GI-NEXT:    fmov x3, d1
849; CHECK-GI-NEXT:    fmov x5, d1
850; CHECK-GI-NEXT:    ret
851entry:
852  %a = load i128, ptr %p
853  %b = insertelement <3 x i128> poison, i128 %a, i64 0
854  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
855  ret <3 x i128> %c
856}
857
858define <4 x i128> @dup_v4i128(i128 %a) {
859; CHECK-LABEL: dup_v4i128:
860; CHECK:       // %bb.0: // %entry
861; CHECK-NEXT:    mov x2, x0
862; CHECK-NEXT:    mov x3, x1
863; CHECK-NEXT:    mov x4, x0
864; CHECK-NEXT:    mov x5, x1
865; CHECK-NEXT:    mov x6, x0
866; CHECK-NEXT:    mov x7, x1
867; CHECK-NEXT:    ret
868entry:
869  %b = insertelement <4 x i128> poison, i128 %a, i64 0
870  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
871  ret <4 x i128> %c
872}
873
874define <4 x i128> @duplane0_v4i128(<4 x i128> %b) {
875; CHECK-LABEL: duplane0_v4i128:
876; CHECK:       // %bb.0: // %entry
877; CHECK-NEXT:    mov x2, x0
878; CHECK-NEXT:    mov x3, x1
879; CHECK-NEXT:    mov x4, x0
880; CHECK-NEXT:    mov x5, x1
881; CHECK-NEXT:    mov x6, x0
882; CHECK-NEXT:    mov x7, x1
883; CHECK-NEXT:    ret
884entry:
885  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
886  ret <4 x i128> %c
887}
888
889define <4 x i128> @loaddup_v4i128(ptr %p) {
890; CHECK-SD-LABEL: loaddup_v4i128:
891; CHECK-SD:       // %bb.0: // %entry
892; CHECK-SD-NEXT:    ldp x2, x1, [x0]
893; CHECK-SD-NEXT:    mov x0, x2
894; CHECK-SD-NEXT:    mov x3, x1
895; CHECK-SD-NEXT:    mov x4, x2
896; CHECK-SD-NEXT:    mov x5, x1
897; CHECK-SD-NEXT:    mov x6, x2
898; CHECK-SD-NEXT:    mov x7, x1
899; CHECK-SD-NEXT:    ret
900;
901; CHECK-GI-LABEL: loaddup_v4i128:
902; CHECK-GI:       // %bb.0: // %entry
903; CHECK-GI-NEXT:    ldr q0, [x0]
904; CHECK-GI-NEXT:    mov d1, v0.d[1]
905; CHECK-GI-NEXT:    fmov x0, d0
906; CHECK-GI-NEXT:    fmov x2, d0
907; CHECK-GI-NEXT:    fmov x4, d0
908; CHECK-GI-NEXT:    fmov x6, d0
909; CHECK-GI-NEXT:    fmov x1, d1
910; CHECK-GI-NEXT:    fmov x3, d1
911; CHECK-GI-NEXT:    fmov x5, d1
912; CHECK-GI-NEXT:    fmov x7, d1
913; CHECK-GI-NEXT:    ret
914entry:
915  %a = load i128, ptr %p
916  %b = insertelement <4 x i128> poison, i128 %a, i64 0
917  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
918  ret <4 x i128> %c
919}
920
921define <2 x half> @dup_v2half(half %a) {
922; CHECK-LABEL: dup_v2half:
923; CHECK:       // %bb.0: // %entry
924; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
925; CHECK-NEXT:    dup v0.4h, v0.h[0]
926; CHECK-NEXT:    ret
927entry:
928  %b = insertelement <2 x half> poison, half %a, i64 0
929  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
930  ret <2 x half> %c
931}
932
933define <2 x half> @duplane0_v2half(<2 x half> %b) {
934; CHECK-LABEL: duplane0_v2half:
935; CHECK:       // %bb.0: // %entry
936; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
937; CHECK-NEXT:    dup v0.4h, v0.h[0]
938; CHECK-NEXT:    ret
939entry:
940  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
941  ret <2 x half> %c
942}
943
944define <2 x half> @loaddup_v2half(ptr %p) {
945; CHECK-LABEL: loaddup_v2half:
946; CHECK:       // %bb.0: // %entry
947; CHECK-NEXT:    ld1r { v0.4h }, [x0]
948; CHECK-NEXT:    ret
949entry:
950  %a = load half, ptr %p
951  %b = insertelement <2 x half> poison, half %a, i64 0
952  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
953  ret <2 x half> %c
954}
955
956define <3 x half> @dup_v3half(half %a) {
957; CHECK-LABEL: dup_v3half:
958; CHECK:       // %bb.0: // %entry
959; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
960; CHECK-NEXT:    dup v0.4h, v0.h[0]
961; CHECK-NEXT:    ret
962entry:
963  %b = insertelement <3 x half> poison, half %a, i64 0
964  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
965  ret <3 x half> %c
966}
967
968define <3 x half> @duplane0_v3half(<3 x half> %b) {
969; CHECK-LABEL: duplane0_v3half:
970; CHECK:       // %bb.0: // %entry
971; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
972; CHECK-NEXT:    dup v0.4h, v0.h[0]
973; CHECK-NEXT:    ret
974entry:
975  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
976  ret <3 x half> %c
977}
978
979define <3 x half> @loaddup_v3half(ptr %p) {
980; CHECK-LABEL: loaddup_v3half:
981; CHECK:       // %bb.0: // %entry
982; CHECK-NEXT:    ld1r { v0.4h }, [x0]
983; CHECK-NEXT:    ret
984entry:
985  %a = load half, ptr %p
986  %b = insertelement <3 x half> poison, half %a, i64 0
987  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
988  ret <3 x half> %c
989}
990
991define <4 x half> @dup_v4half(half %a) {
992; CHECK-LABEL: dup_v4half:
993; CHECK:       // %bb.0: // %entry
994; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
995; CHECK-NEXT:    dup v0.4h, v0.h[0]
996; CHECK-NEXT:    ret
997entry:
998  %b = insertelement <4 x half> poison, half %a, i64 0
999  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
1000  ret <4 x half> %c
1001}
1002
1003define <4 x half> @duplane0_v4half(<4 x half> %b) {
1004; CHECK-LABEL: duplane0_v4half:
1005; CHECK:       // %bb.0: // %entry
1006; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1007; CHECK-NEXT:    dup v0.4h, v0.h[0]
1008; CHECK-NEXT:    ret
1009entry:
1010  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
1011  ret <4 x half> %c
1012}
1013
1014define <4 x half> @loaddup_v4half(ptr %p) {
1015; CHECK-LABEL: loaddup_v4half:
1016; CHECK:       // %bb.0: // %entry
1017; CHECK-NEXT:    ld1r { v0.4h }, [x0]
1018; CHECK-NEXT:    ret
1019entry:
1020  %a = load half, ptr %p
1021  %b = insertelement <4 x half> poison, half %a, i64 0
1022  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
1023  ret <4 x half> %c
1024}
1025
1026define <8 x half> @dup_v8half(half %a) {
1027; CHECK-LABEL: dup_v8half:
1028; CHECK:       // %bb.0: // %entry
1029; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
1030; CHECK-NEXT:    dup v0.8h, v0.h[0]
1031; CHECK-NEXT:    ret
1032entry:
1033  %b = insertelement <8 x half> poison, half %a, i64 0
1034  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
1035  ret <8 x half> %c
1036}
1037
1038define <8 x half> @duplane0_v8half(<8 x half> %b) {
1039; CHECK-LABEL: duplane0_v8half:
1040; CHECK:       // %bb.0: // %entry
1041; CHECK-NEXT:    dup v0.8h, v0.h[0]
1042; CHECK-NEXT:    ret
1043entry:
1044  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
1045  ret <8 x half> %c
1046}
1047
1048define <8 x half> @loaddup_v8half(ptr %p) {
1049; CHECK-LABEL: loaddup_v8half:
1050; CHECK:       // %bb.0: // %entry
1051; CHECK-NEXT:    ld1r { v0.8h }, [x0]
1052; CHECK-NEXT:    ret
1053entry:
1054  %a = load half, ptr %p
1055  %b = insertelement <8 x half> poison, half %a, i64 0
1056  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
1057  ret <8 x half> %c
1058}
1059
1060define <16 x half> @dup_v16half(half %a) {
1061; CHECK-SD-LABEL: dup_v16half:
1062; CHECK-SD:       // %bb.0: // %entry
1063; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
1064; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
1065; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1066; CHECK-SD-NEXT:    ret
1067;
1068; CHECK-GI-LABEL: dup_v16half:
1069; CHECK-GI:       // %bb.0: // %entry
1070; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
1071; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
1072; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
1073; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1074; CHECK-GI-NEXT:    ret
1075entry:
1076  %b = insertelement <16 x half> poison, half %a, i64 0
1077  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
1078  ret <16 x half> %c
1079}
1080
1081define <16 x half> @duplane0_v16half(<16 x half> %b) {
1082; CHECK-LABEL: duplane0_v16half:
1083; CHECK:       // %bb.0: // %entry
1084; CHECK-NEXT:    dup v0.8h, v0.h[0]
1085; CHECK-NEXT:    mov v1.16b, v0.16b
1086; CHECK-NEXT:    ret
1087entry:
1088  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
1089  ret <16 x half> %c
1090}
1091
1092define <16 x half> @loaddup_v16half(ptr %p) {
1093; CHECK-SD-LABEL: loaddup_v16half:
1094; CHECK-SD:       // %bb.0: // %entry
1095; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
1096; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1097; CHECK-SD-NEXT:    ret
1098;
1099; CHECK-GI-LABEL: loaddup_v16half:
1100; CHECK-GI:       // %bb.0: // %entry
1101; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
1102; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
1103; CHECK-GI-NEXT:    ret
1104entry:
1105  %a = load half, ptr %p
1106  %b = insertelement <16 x half> poison, half %a, i64 0
1107  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
1108  ret <16 x half> %c
1109}
1110
1111define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
1112; CHECK-LABEL: dup_v2bfloat:
1113; CHECK:       // %bb.0: // %entry
1114; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
1115; CHECK-NEXT:    dup v0.4h, v0.h[0]
1116; CHECK-NEXT:    ret
1117entry:
1118  %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
1119  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
1120  ret <2 x bfloat> %c
1121}
1122
1123define <2 x bfloat> @duplane0_v2bfloat(<2 x bfloat> %b) {
1124; CHECK-LABEL: duplane0_v2bfloat:
1125; CHECK:       // %bb.0: // %entry
1126; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1127; CHECK-NEXT:    dup v0.4h, v0.h[0]
1128; CHECK-NEXT:    ret
1129entry:
1130  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
1131  ret <2 x bfloat> %c
1132}
1133
1134define <2 x bfloat> @loaddup_v2bfloat(ptr %p) {
1135; CHECK-LABEL: loaddup_v2bfloat:
1136; CHECK:       // %bb.0: // %entry
1137; CHECK-NEXT:    ld1r { v0.4h }, [x0]
1138; CHECK-NEXT:    ret
1139entry:
1140  %a = load bfloat, ptr %p
1141  %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
1142  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
1143  ret <2 x bfloat> %c
1144}
1145
1146define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
1147; CHECK-LABEL: dup_v3bfloat:
1148; CHECK:       // %bb.0: // %entry
1149; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
1150; CHECK-NEXT:    dup v0.4h, v0.h[0]
1151; CHECK-NEXT:    ret
1152entry:
1153  %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
1154  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
1155  ret <3 x bfloat> %c
1156}
1157
1158define <3 x bfloat> @duplane0_v3bfloat(<3 x bfloat> %b) {
1159; CHECK-LABEL: duplane0_v3bfloat:
1160; CHECK:       // %bb.0: // %entry
1161; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1162; CHECK-NEXT:    dup v0.4h, v0.h[0]
1163; CHECK-NEXT:    ret
1164entry:
1165  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
1166  ret <3 x bfloat> %c
1167}
1168
1169define <3 x bfloat> @loaddup_v3bfloat(ptr %p) {
1170; CHECK-LABEL: loaddup_v3bfloat:
1171; CHECK:       // %bb.0: // %entry
1172; CHECK-NEXT:    ld1r { v0.4h }, [x0]
1173; CHECK-NEXT:    ret
1174entry:
1175  %a = load bfloat, ptr %p
1176  %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
1177  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
1178  ret <3 x bfloat> %c
1179}
1180
1181define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
1182; CHECK-LABEL: dup_v4bfloat:
1183; CHECK:       // %bb.0: // %entry
1184; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
1185; CHECK-NEXT:    dup v0.4h, v0.h[0]
1186; CHECK-NEXT:    ret
1187entry:
1188  %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
1189  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
1190  ret <4 x bfloat> %c
1191}
1192
1193define <4 x bfloat> @duplane0_v4bfloat(<4 x bfloat> %b) {
1194; CHECK-LABEL: duplane0_v4bfloat:
1195; CHECK:       // %bb.0: // %entry
1196; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1197; CHECK-NEXT:    dup v0.4h, v0.h[0]
1198; CHECK-NEXT:    ret
1199entry:
1200  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
1201  ret <4 x bfloat> %c
1202}
1203
1204define <4 x bfloat> @loaddup_v4bfloat(ptr %p) {
1205; CHECK-LABEL: loaddup_v4bfloat:
1206; CHECK:       // %bb.0: // %entry
1207; CHECK-NEXT:    ld1r { v0.4h }, [x0]
1208; CHECK-NEXT:    ret
1209entry:
1210  %a = load bfloat, ptr %p
1211  %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
1212  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
1213  ret <4 x bfloat> %c
1214}
1215
1216define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
1217; CHECK-LABEL: dup_v8bfloat:
1218; CHECK:       // %bb.0: // %entry
1219; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
1220; CHECK-NEXT:    dup v0.8h, v0.h[0]
1221; CHECK-NEXT:    ret
1222entry:
1223  %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
1224  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
1225  ret <8 x bfloat> %c
1226}
1227
1228define <8 x bfloat> @duplane0_v8bfloat(<8 x bfloat> %b) {
1229; CHECK-LABEL: duplane0_v8bfloat:
1230; CHECK:       // %bb.0: // %entry
1231; CHECK-NEXT:    dup v0.8h, v0.h[0]
1232; CHECK-NEXT:    ret
1233entry:
1234  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
1235  ret <8 x bfloat> %c
1236}
1237
1238define <8 x bfloat> @loaddup_v8bfloat(ptr %p) {
1239; CHECK-LABEL: loaddup_v8bfloat:
1240; CHECK:       // %bb.0: // %entry
1241; CHECK-NEXT:    ld1r { v0.8h }, [x0]
1242; CHECK-NEXT:    ret
1243entry:
1244  %a = load bfloat, ptr %p
1245  %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
1246  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
1247  ret <8 x bfloat> %c
1248}
1249
1250define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
1251; CHECK-SD-LABEL: dup_v16bfloat:
1252; CHECK-SD:       // %bb.0: // %entry
1253; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
1254; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
1255; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1256; CHECK-SD-NEXT:    ret
1257;
1258; CHECK-GI-LABEL: dup_v16bfloat:
1259; CHECK-GI:       // %bb.0: // %entry
1260; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
1261; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
1262; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
1263; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1264; CHECK-GI-NEXT:    ret
1265entry:
1266  %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
1267  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
1268  ret <16 x bfloat> %c
1269}
1270
1271define <16 x bfloat> @duplane0_v16bfloat(<16 x bfloat> %b) {
1272; CHECK-LABEL: duplane0_v16bfloat:
1273; CHECK:       // %bb.0: // %entry
1274; CHECK-NEXT:    dup v0.8h, v0.h[0]
1275; CHECK-NEXT:    mov v1.16b, v0.16b
1276; CHECK-NEXT:    ret
1277entry:
1278  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
1279  ret <16 x bfloat> %c
1280}
1281
1282define <16 x bfloat> @loaddup_v16bfloat(ptr %p) {
1283; CHECK-SD-LABEL: loaddup_v16bfloat:
1284; CHECK-SD:       // %bb.0: // %entry
1285; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
1286; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1287; CHECK-SD-NEXT:    ret
1288;
1289; CHECK-GI-LABEL: loaddup_v16bfloat:
1290; CHECK-GI:       // %bb.0: // %entry
1291; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
1292; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
1293; CHECK-GI-NEXT:    ret
1294entry:
1295  %a = load bfloat, ptr %p
1296  %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
1297  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
1298  ret <16 x bfloat> %c
1299}
1300
1301define <2 x float> @dup_v2float(float %a) {
1302; CHECK-LABEL: dup_v2float:
1303; CHECK:       // %bb.0: // %entry
1304; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1305; CHECK-NEXT:    dup v0.2s, v0.s[0]
1306; CHECK-NEXT:    ret
1307entry:
1308  %b = insertelement <2 x float> poison, float %a, i64 0
1309  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
1310  ret <2 x float> %c
1311}
1312
1313define <2 x float> @duplane0_v2float(<2 x float> %b) {
1314; CHECK-LABEL: duplane0_v2float:
1315; CHECK:       // %bb.0: // %entry
1316; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1317; CHECK-NEXT:    dup v0.2s, v0.s[0]
1318; CHECK-NEXT:    ret
1319entry:
1320  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
1321  ret <2 x float> %c
1322}
1323
1324define <2 x float> @loaddup_v2float(ptr %p) {
1325; CHECK-LABEL: loaddup_v2float:
1326; CHECK:       // %bb.0: // %entry
1327; CHECK-NEXT:    ld1r { v0.2s }, [x0]
1328; CHECK-NEXT:    ret
1329entry:
1330  %a = load float, ptr %p
1331  %b = insertelement <2 x float> poison, float %a, i64 0
1332  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
1333  ret <2 x float> %c
1334}
1335
1336define <3 x float> @dup_v3float(float %a) {
1337; CHECK-LABEL: dup_v3float:
1338; CHECK:       // %bb.0: // %entry
1339; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1340; CHECK-NEXT:    dup v0.4s, v0.s[0]
1341; CHECK-NEXT:    ret
1342entry:
1343  %b = insertelement <3 x float> poison, float %a, i64 0
1344  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
1345  ret <3 x float> %c
1346}
1347
1348define <3 x float> @duplane0_v3float(<3 x float> %b) {
1349; CHECK-LABEL: duplane0_v3float:
1350; CHECK:       // %bb.0: // %entry
1351; CHECK-NEXT:    dup v0.4s, v0.s[0]
1352; CHECK-NEXT:    ret
1353entry:
1354  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
1355  ret <3 x float> %c
1356}
1357
1358define <3 x float> @loaddup_v3float(ptr %p) {
1359; CHECK-LABEL: loaddup_v3float:
1360; CHECK:       // %bb.0: // %entry
1361; CHECK-NEXT:    ld1r { v0.4s }, [x0]
1362; CHECK-NEXT:    ret
1363entry:
1364  %a = load float, ptr %p
1365  %b = insertelement <3 x float> poison, float %a, i64 0
1366  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
1367  ret <3 x float> %c
1368}
1369
1370define <4 x float> @dup_v4float(float %a) {
1371; CHECK-LABEL: dup_v4float:
1372; CHECK:       // %bb.0: // %entry
1373; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1374; CHECK-NEXT:    dup v0.4s, v0.s[0]
1375; CHECK-NEXT:    ret
1376entry:
1377  %b = insertelement <4 x float> poison, float %a, i64 0
1378  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1379  ret <4 x float> %c
1380}
1381
1382define <4 x float> @duplane0_v4float(<4 x float> %b) {
1383; CHECK-LABEL: duplane0_v4float:
1384; CHECK:       // %bb.0: // %entry
1385; CHECK-NEXT:    dup v0.4s, v0.s[0]
1386; CHECK-NEXT:    ret
1387entry:
1388  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1389  ret <4 x float> %c
1390}
1391
1392define <4 x float> @loaddup_v4float(ptr %p) {
1393; CHECK-LABEL: loaddup_v4float:
1394; CHECK:       // %bb.0: // %entry
1395; CHECK-NEXT:    ld1r { v0.4s }, [x0]
1396; CHECK-NEXT:    ret
1397entry:
1398  %a = load float, ptr %p
1399  %b = insertelement <4 x float> poison, float %a, i64 0
1400  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1401  ret <4 x float> %c
1402}
1403
1404define <8 x float> @dup_v8float(float %a) {
1405; CHECK-SD-LABEL: dup_v8float:
1406; CHECK-SD:       // %bb.0: // %entry
1407; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1408; CHECK-SD-NEXT:    dup v0.4s, v0.s[0]
1409; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1410; CHECK-SD-NEXT:    ret
1411;
1412; CHECK-GI-LABEL: dup_v8float:
1413; CHECK-GI:       // %bb.0: // %entry
1414; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1415; CHECK-GI-NEXT:    dup v2.4s, v0.s[0]
1416; CHECK-GI-NEXT:    dup v1.4s, v0.s[0]
1417; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1418; CHECK-GI-NEXT:    ret
1419entry:
1420  %b = insertelement <8 x float> poison, float %a, i64 0
1421  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
1422  ret <8 x float> %c
1423}
1424
1425define <8 x float> @duplane0_v8float(<8 x float> %b) {
1426; CHECK-LABEL: duplane0_v8float:
1427; CHECK:       // %bb.0: // %entry
1428; CHECK-NEXT:    dup v0.4s, v0.s[0]
1429; CHECK-NEXT:    mov v1.16b, v0.16b
1430; CHECK-NEXT:    ret
1431entry:
1432  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
1433  ret <8 x float> %c
1434}
1435
1436define <8 x float> @loaddup_v8float(ptr %p) {
1437; CHECK-SD-LABEL: loaddup_v8float:
1438; CHECK-SD:       // %bb.0: // %entry
1439; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
1440; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1441; CHECK-SD-NEXT:    ret
1442;
1443; CHECK-GI-LABEL: loaddup_v8float:
1444; CHECK-GI:       // %bb.0: // %entry
1445; CHECK-GI-NEXT:    ld1r { v0.4s }, [x0]
1446; CHECK-GI-NEXT:    ld1r { v1.4s }, [x0]
1447; CHECK-GI-NEXT:    ret
1448entry:
1449  %a = load float, ptr %p
1450  %b = insertelement <8 x float> poison, float %a, i64 0
1451  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
1452  ret <8 x float> %c
1453}
1454
1455define <2 x double> @dup_v2double(double %a) {
1456; CHECK-LABEL: dup_v2double:
1457; CHECK:       // %bb.0: // %entry
1458; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1459; CHECK-NEXT:    dup v0.2d, v0.d[0]
1460; CHECK-NEXT:    ret
1461entry:
1462  %b = insertelement <2 x double> poison, double %a, i64 0
1463  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1464  ret <2 x double> %c
1465}
1466
1467define <2 x double> @duplane0_v2double(<2 x double> %b) {
1468; CHECK-LABEL: duplane0_v2double:
1469; CHECK:       // %bb.0: // %entry
1470; CHECK-NEXT:    dup v0.2d, v0.d[0]
1471; CHECK-NEXT:    ret
1472entry:
1473  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1474  ret <2 x double> %c
1475}
1476
1477define <2 x double> @loaddup_v2double(ptr %p) {
1478; CHECK-LABEL: loaddup_v2double:
1479; CHECK:       // %bb.0: // %entry
1480; CHECK-NEXT:    ld1r { v0.2d }, [x0]
1481; CHECK-NEXT:    ret
1482entry:
1483  %a = load double, ptr %p
1484  %b = insertelement <2 x double> poison, double %a, i64 0
1485  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1486  ret <2 x double> %c
1487}
1488
1489define <3 x double> @dup_v3double(double %a) {
1490; CHECK-SD-LABEL: dup_v3double:
1491; CHECK-SD:       // %bb.0: // %entry
1492; CHECK-SD-NEXT:    fmov d1, d0
1493; CHECK-SD-NEXT:    fmov d2, d0
1494; CHECK-SD-NEXT:    ret
1495;
1496; CHECK-GI-LABEL: dup_v3double:
1497; CHECK-GI:       // %bb.0: // %entry
1498; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1499; CHECK-GI-NEXT:    dup v3.2d, v0.d[0]
1500; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
1501; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
1502; CHECK-GI-NEXT:    mov d1, v3.d[1]
1503; CHECK-GI-NEXT:    fmov d0, d3
1504; CHECK-GI-NEXT:    ret
1505entry:
1506  %b = insertelement <3 x double> poison, double %a, i64 0
1507  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
1508  ret <3 x double> %c
1509}
1510
1511define <3 x double> @duplane0_v3double(<3 x double> %b) {
1512; CHECK-SD-LABEL: duplane0_v3double:
1513; CHECK-SD:       // %bb.0: // %entry
1514; CHECK-SD-NEXT:    fmov d1, d0
1515; CHECK-SD-NEXT:    fmov d2, d0
1516; CHECK-SD-NEXT:    ret
1517;
1518; CHECK-GI-LABEL: duplane0_v3double:
1519; CHECK-GI:       // %bb.0: // %entry
1520; CHECK-GI-NEXT:    fmov d2, d0
1521; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
1522; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
1523; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
1524; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
1525; CHECK-GI-NEXT:    mov d1, v0.d[1]
1526; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1527; CHECK-GI-NEXT:    ret
1528entry:
1529  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
1530  ret <3 x double> %c
1531}
1532
1533define <3 x double> @loaddup_v3double(ptr %p) {
1534; CHECK-SD-LABEL: loaddup_v3double:
1535; CHECK-SD:       // %bb.0: // %entry
1536; CHECK-SD-NEXT:    ldr d0, [x0]
1537; CHECK-SD-NEXT:    fmov d1, d0
1538; CHECK-SD-NEXT:    fmov d2, d0
1539; CHECK-SD-NEXT:    ret
1540;
1541; CHECK-GI-LABEL: loaddup_v3double:
1542; CHECK-GI:       // %bb.0: // %entry
1543; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
1544; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
1545; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
1546; CHECK-GI-NEXT:    mov d1, v0.d[1]
1547; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1548; CHECK-GI-NEXT:    ret
1549entry:
1550  %a = load double, ptr %p
1551  %b = insertelement <3 x double> poison, double %a, i64 0
1552  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
1553  ret <3 x double> %c
1554}
1555
1556define <4 x double> @dup_v4double(double %a) {
1557; CHECK-SD-LABEL: dup_v4double:
1558; CHECK-SD:       // %bb.0: // %entry
1559; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1560; CHECK-SD-NEXT:    dup v0.2d, v0.d[0]
1561; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1562; CHECK-SD-NEXT:    ret
1563;
1564; CHECK-GI-LABEL: dup_v4double:
1565; CHECK-GI:       // %bb.0: // %entry
1566; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1567; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
1568; CHECK-GI-NEXT:    dup v1.2d, v0.d[0]
1569; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1570; CHECK-GI-NEXT:    ret
1571entry:
1572  %b = insertelement <4 x double> poison, double %a, i64 0
1573  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
1574  ret <4 x double> %c
1575}
1576
1577define <4 x double> @duplane0_v4double(<4 x double> %b) {
1578; CHECK-LABEL: duplane0_v4double:
1579; CHECK:       // %bb.0: // %entry
1580; CHECK-NEXT:    dup v0.2d, v0.d[0]
1581; CHECK-NEXT:    mov v1.16b, v0.16b
1582; CHECK-NEXT:    ret
1583entry:
1584  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
1585  ret <4 x double> %c
1586}
1587
1588define <4 x double> @loaddup_v4double(ptr %p) {
1589; CHECK-SD-LABEL: loaddup_v4double:
1590; CHECK-SD:       // %bb.0: // %entry
1591; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
1592; CHECK-SD-NEXT:    mov v1.16b, v0.16b
1593; CHECK-SD-NEXT:    ret
1594;
1595; CHECK-GI-LABEL: loaddup_v4double:
1596; CHECK-GI:       // %bb.0: // %entry
1597; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
1598; CHECK-GI-NEXT:    ld1r { v1.2d }, [x0]
1599; CHECK-GI-NEXT:    ret
1600entry:
1601  %a = load double, ptr %p
1602  %b = insertelement <4 x double> poison, double %a, i64 0
1603  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
1604  ret <4 x double> %c
1605}
1606
1607define <2 x fp128> @dup_v2fp128(fp128 %a) {
1608; CHECK-LABEL: dup_v2fp128:
1609; CHECK:       // %bb.0: // %entry
1610; CHECK-NEXT:    mov v1.16b, v0.16b
1611; CHECK-NEXT:    ret
1612entry:
1613  %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
1614  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
1615  ret <2 x fp128> %c
1616}
1617
1618define <2 x fp128> @duplane0_v2fp128(<2 x fp128> %b) {
1619; CHECK-LABEL: duplane0_v2fp128:
1620; CHECK:       // %bb.0: // %entry
1621; CHECK-NEXT:    mov v1.16b, v0.16b
1622; CHECK-NEXT:    ret
1623entry:
1624  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
1625  ret <2 x fp128> %c
1626}
1627
1628define <2 x fp128> @loaddup_v2fp128(ptr %p) {
1629; CHECK-LABEL: loaddup_v2fp128:
1630; CHECK:       // %bb.0: // %entry
1631; CHECK-NEXT:    ldr q0, [x0]
1632; CHECK-NEXT:    mov v1.16b, v0.16b
1633; CHECK-NEXT:    ret
1634entry:
1635  %a = load fp128, ptr %p
1636  %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
1637  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
1638  ret <2 x fp128> %c
1639}
1640
1641define <3 x fp128> @dup_v3fp128(fp128 %a) {
1642; CHECK-LABEL: dup_v3fp128:
1643; CHECK:       // %bb.0: // %entry
1644; CHECK-NEXT:    mov v1.16b, v0.16b
1645; CHECK-NEXT:    mov v2.16b, v0.16b
1646; CHECK-NEXT:    ret
1647entry:
1648  %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
1649  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
1650  ret <3 x fp128> %c
1651}
1652
1653define <3 x fp128> @duplane0_v3fp128(<3 x fp128> %b) {
1654; CHECK-LABEL: duplane0_v3fp128:
1655; CHECK:       // %bb.0: // %entry
1656; CHECK-NEXT:    mov v1.16b, v0.16b
1657; CHECK-NEXT:    mov v2.16b, v0.16b
1658; CHECK-NEXT:    ret
1659entry:
1660  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
1661  ret <3 x fp128> %c
1662}
1663
1664define <3 x fp128> @loaddup_v3fp128(ptr %p) {
1665; CHECK-LABEL: loaddup_v3fp128:
1666; CHECK:       // %bb.0: // %entry
1667; CHECK-NEXT:    ldr q0, [x0]
1668; CHECK-NEXT:    mov v1.16b, v0.16b
1669; CHECK-NEXT:    mov v2.16b, v0.16b
1670; CHECK-NEXT:    ret
1671entry:
1672  %a = load fp128, ptr %p
1673  %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
1674  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
1675  ret <3 x fp128> %c
1676}
1677
1678define <4 x fp128> @dup_v4fp128(fp128 %a) {
1679; CHECK-LABEL: dup_v4fp128:
1680; CHECK:       // %bb.0: // %entry
1681; CHECK-NEXT:    mov v1.16b, v0.16b
1682; CHECK-NEXT:    mov v2.16b, v0.16b
1683; CHECK-NEXT:    mov v3.16b, v0.16b
1684; CHECK-NEXT:    ret
1685entry:
1686  %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
1687  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
1688  ret <4 x fp128> %c
1689}
1690
1691define <4 x fp128> @duplane0_v4fp128(<4 x fp128> %b) {
1692; CHECK-LABEL: duplane0_v4fp128:
1693; CHECK:       // %bb.0: // %entry
1694; CHECK-NEXT:    mov v1.16b, v0.16b
1695; CHECK-NEXT:    mov v2.16b, v0.16b
1696; CHECK-NEXT:    mov v3.16b, v0.16b
1697; CHECK-NEXT:    ret
1698entry:
1699  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
1700  ret <4 x fp128> %c
1701}
1702
1703define <4 x fp128> @loaddup_v4fp128(ptr %p) {
1704; CHECK-LABEL: loaddup_v4fp128:
1705; CHECK:       // %bb.0: // %entry
1706; CHECK-NEXT:    ldr q0, [x0]
1707; CHECK-NEXT:    mov v1.16b, v0.16b
1708; CHECK-NEXT:    mov v2.16b, v0.16b
1709; CHECK-NEXT:    mov v3.16b, v0.16b
1710; CHECK-NEXT:    ret
1711entry:
1712  %a = load fp128, ptr %p
1713  %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
1714  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
1715  ret <4 x fp128> %c
1716}
1717