xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-insert-element.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
3
4define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
5; CHECK-LABEL: test_lane0_16xi8:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    ptrue p0.b, vl1
8; CHECK-NEXT:    mov w8, #30 // =0x1e
9; CHECK-NEXT:    mov z0.b, p0/m, w8
10; CHECK-NEXT:    ret
11  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
12  ret <vscale x 16 x i8> %b
13}
14
15define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
16; CHECK-LABEL: test_lane0_8xi16:
17; CHECK:       // %bb.0:
18; CHECK-NEXT:    ptrue p0.h, vl1
19; CHECK-NEXT:    mov w8, #30 // =0x1e
20; CHECK-NEXT:    mov z0.h, p0/m, w8
21; CHECK-NEXT:    ret
22  %b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
23  ret <vscale x 8 x i16> %b
24}
25
26define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
27; CHECK-LABEL: test_lane0_4xi32:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    ptrue p0.s, vl1
30; CHECK-NEXT:    mov w8, #30 // =0x1e
31; CHECK-NEXT:    mov z0.s, p0/m, w8
32; CHECK-NEXT:    ret
33  %b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
34  ret <vscale x 4 x i32> %b
35}
36
37define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
38; CHECK-LABEL: test_lane0_2xi64:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    ptrue p0.d, vl1
41; CHECK-NEXT:    mov w8, #30 // =0x1e
42; CHECK-NEXT:    mov z0.d, p0/m, x8
43; CHECK-NEXT:    ret
44  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
45  ret <vscale x 2 x i64> %b
46}
47
48define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
49; CHECK-LABEL: test_lane0_2xf64:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    fmov d1, #1.00000000
52; CHECK-NEXT:    ptrue p0.d, vl1
53; CHECK-NEXT:    mov z0.d, p0/m, z1.d
54; CHECK-NEXT:    ret
55  %b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
56  ret <vscale x 2 x double> %b
57}
58
59define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
60; CHECK-LABEL: test_lane0_4xf32:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    fmov s1, #1.00000000
63; CHECK-NEXT:    ptrue p0.s, vl1
64; CHECK-NEXT:    mov z0.s, p0/m, z1.s
65; CHECK-NEXT:    ret
66  %b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
67  ret <vscale x 4 x float> %b
68}
69
70define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
71; CHECK-LABEL: test_lane0_8xf16:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    fmov h1, #1.00000000
74; CHECK-NEXT:    ptrue p0.h, vl1
75; CHECK-NEXT:    mov z0.h, p0/m, z1.h
76; CHECK-NEXT:    ret
77  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
78  ret <vscale x 8 x half> %b
79}
80
81define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
82; CHECK-LABEL: test_lane0_8xbf16:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    ptrue p0.h, vl1
85; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
86; CHECK-NEXT:    mov z0.h, p0/m, z1.h
87; CHECK-NEXT:    ret
88  %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
89  ret <vscale x 8 x bfloat> %b
90}
91
92; Undefined lane insert
93define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
94; CHECK-LABEL: test_lane4_2xi64:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    mov w8, #4 // =0x4
97; CHECK-NEXT:    index z1.d, #0, #1
98; CHECK-NEXT:    ptrue p0.d
99; CHECK-NEXT:    mov z2.d, x8
100; CHECK-NEXT:    mov w8, #30 // =0x1e
101; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
102; CHECK-NEXT:    mov z0.d, p0/m, x8
103; CHECK-NEXT:    ret
104  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
105  ret <vscale x 2 x i64> %b
106}
107
108; Undefined lane insert
109define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
110; CHECK-LABEL: test_lane9_8xf16:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    mov w8, #9 // =0x9
113; CHECK-NEXT:    index z1.h, #0, #1
114; CHECK-NEXT:    ptrue p0.h
115; CHECK-NEXT:    mov z2.h, w8
116; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
117; CHECK-NEXT:    fmov h1, #1.00000000
118; CHECK-NEXT:    mov z0.h, p0/m, h1
119; CHECK-NEXT:    ret
120  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
121  ret <vscale x 8 x half> %b
122}
123
124define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
125; CHECK-LABEL: test_lane9_8xbf16:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    mov w8, #9 // =0x9
128; CHECK-NEXT:    index z2.h, #0, #1
129; CHECK-NEXT:    ptrue p0.h
130; CHECK-NEXT:    mov z3.h, w8
131; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z3.h
132; CHECK-NEXT:    mov z0.h, p0/m, h1
133; CHECK-NEXT:    ret
134  %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
135  ret <vscale x 8 x bfloat> %b
136}
137
138define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
139; CHECK-LABEL: test_lane1_16xi8:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    mov w8, #1 // =0x1
142; CHECK-NEXT:    index z1.b, #0, #1
143; CHECK-NEXT:    ptrue p0.b
144; CHECK-NEXT:    mov z2.b, w8
145; CHECK-NEXT:    mov w8, #30 // =0x1e
146; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
147; CHECK-NEXT:    mov z0.b, p0/m, w8
148; CHECK-NEXT:    ret
149  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
150  ret <vscale x 16 x i8> %b
151}
152
153define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
154; CHECK-LABEL: test_lanex_16xi8:
155; CHECK:       // %bb.0:
156; CHECK-NEXT:    index z1.b, #0, #1
157; CHECK-NEXT:    mov w8, w0
158; CHECK-NEXT:    ptrue p0.b
159; CHECK-NEXT:    mov z2.b, w8
160; CHECK-NEXT:    mov w8, #30 // =0x1e
161; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
162; CHECK-NEXT:    mov z0.b, p0/m, w8
163; CHECK-NEXT:    ret
164  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
165  ret <vscale x 16 x i8> %b
166}
167
168
169; Redundant lane insert
170define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
171; CHECK-LABEL: extract_insert_4xi32:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    ret
174  %b = extractelement <vscale x 4 x i32> %a, i32 2
175  %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
176  ret <vscale x 4 x i32> %c
177}
178
179define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
180; CHECK-LABEL: test_lane6_undef_8xi16:
181; CHECK:       // %bb.0:
182; CHECK-NEXT:    mov w8, #6 // =0x6
183; CHECK-NEXT:    index z0.h, #0, #1
184; CHECK-NEXT:    ptrue p0.h
185; CHECK-NEXT:    mov z1.h, w8
186; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
187; CHECK-NEXT:    mov z0.h, p0/m, w0
188; CHECK-NEXT:    ret
189  %b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
190  ret <vscale x 8 x i16> %b
191}
192
193define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
194; CHECK-LABEL: test_lane0_undef_16xi8:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    fmov s0, w0
197; CHECK-NEXT:    ret
198  %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
199  ret <vscale x 16 x i8> %b
200}
201
202define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
203; CHECK-LABEL: test_insert0_of_extract0_16xi8:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    fmov w8, s1
206; CHECK-NEXT:    ptrue p0.b, vl1
207; CHECK-NEXT:    mov z0.b, p0/m, w8
208; CHECK-NEXT:    ret
209  %c = extractelement <vscale x 16 x i8> %b, i32 0
210  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
211  ret <vscale x 16 x i8> %d
212}
213
214define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
215; CHECK-LABEL: test_insert64_of_extract64_16xi8:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    mov w8, #64 // =0x40
218; CHECK-NEXT:    whilels p0.b, xzr, x8
219; CHECK-NEXT:    mov z2.b, w8
220; CHECK-NEXT:    lastb w9, p0, z1.b
221; CHECK-NEXT:    index z1.b, #0, #1
222; CHECK-NEXT:    ptrue p0.b
223; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
224; CHECK-NEXT:    mov z0.b, p0/m, w9
225; CHECK-NEXT:    ret
226  %c = extractelement <vscale x 16 x i8> %b, i32 64
227  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
228  ret <vscale x 16 x i8> %d
229}
230
231define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
232; CHECK-LABEL: test_insert3_of_extract1_16xi8:
233; CHECK:       // %bb.0:
234; CHECK-NEXT:    mov w8, #3 // =0x3
235; CHECK-NEXT:    index z2.b, #0, #1
236; CHECK-NEXT:    ptrue p0.b
237; CHECK-NEXT:    mov z3.b, w8
238; CHECK-NEXT:    umov w8, v1.b[1]
239; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z3.b
240; CHECK-NEXT:    mov z0.b, p0/m, w8
241; CHECK-NEXT:    ret
242  %c = extractelement <vscale x 16 x i8> %b, i32 1
243  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
244  ret <vscale x 16 x i8> %d
245}
246
247define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
248; CHECK-LABEL: test_insert_into_undef_nxv8f16:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
251; CHECK-NEXT:    ret
252  %b = insertelement <vscale x 8 x half> undef, half %a, i32 0
253  ret <vscale x 8 x half> %b
254}
255
256define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
257; CHECK-LABEL: test_insert_into_undef_nxv4f16:
258; CHECK:       // %bb.0:
259; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
260; CHECK-NEXT:    ret
261  %b = insertelement <vscale x 4 x half> undef, half %a, i32 0
262  ret <vscale x 4 x half> %b
263}
264
265define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
266; CHECK-LABEL: test_insert_into_undef_nxv2f16:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
269; CHECK-NEXT:    ret
270  %b = insertelement <vscale x 2 x half> undef, half %a, i32 0
271  ret <vscale x 2 x half> %b
272}
273
274define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
275; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
278; CHECK-NEXT:    ret
279  %b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
280  ret <vscale x 8 x bfloat> %b
281}
282
283define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
284; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
287; CHECK-NEXT:    ret
288  %b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
289  ret <vscale x 4 x bfloat> %b
290}
291
292define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
293; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
294; CHECK:       // %bb.0:
295; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
296; CHECK-NEXT:    ret
297  %b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
298  ret <vscale x 2 x bfloat> %b
299}
300
301define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
302; CHECK-LABEL: test_insert_into_undef_nxv4f32:
303; CHECK:       // %bb.0:
304; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
305; CHECK-NEXT:    ret
306  %b = insertelement <vscale x 4 x float> undef, float %a, i32 0
307  ret <vscale x 4 x float> %b
308}
309
310define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
311; CHECK-LABEL: test_insert_into_undef_nxv2f32:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
314; CHECK-NEXT:    ret
315  %b = insertelement <vscale x 2 x float> undef, float %a, i32 0
316  ret <vscale x 2 x float> %b
317}
318
319define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
320; CHECK-LABEL: test_insert_into_undef_nxv2f64:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
323; CHECK-NEXT:    ret
324  %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
325  ret <vscale x 2 x double> %b
326}
327
328; Insert scalar at index
329define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
330; CHECK-LABEL: test_insert_with_index_nxv2f16:
331; CHECK:       // %bb.0:
332; CHECK-NEXT:    index z1.d, #0, #1
333; CHECK-NEXT:    mov z2.d, x0
334; CHECK-NEXT:    ptrue p0.d
335; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
336; CHECK-NEXT:    mov z0.h, p0/m, h0
337; CHECK-NEXT:    ret
338  %res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx
339  ret <vscale x 2 x half> %res
340}
341
342define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
343; CHECK-LABEL: test_insert_with_index_nxv4f16:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    index z1.s, #0, #1
346; CHECK-NEXT:    mov z2.s, w0
347; CHECK-NEXT:    ptrue p0.s
348; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
349; CHECK-NEXT:    mov z0.h, p0/m, h0
350; CHECK-NEXT:    ret
351  %res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx
352  ret <vscale x 4 x half> %res
353}
354
355define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
356; CHECK-LABEL: test_insert_with_index_nxv8f16:
357; CHECK:       // %bb.0:
358; CHECK-NEXT:    index z1.h, #0, #1
359; CHECK-NEXT:    mov z2.h, w0
360; CHECK-NEXT:    ptrue p0.h
361; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
362; CHECK-NEXT:    mov z0.h, p0/m, h0
363; CHECK-NEXT:    ret
364  %res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx
365  ret <vscale x 8 x half> %res
366}
367
368define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
369; CHECK-LABEL: test_insert_with_index_nxv2bf16:
370; CHECK:       // %bb.0:
371; CHECK-NEXT:    index z1.d, #0, #1
372; CHECK-NEXT:    mov z2.d, x0
373; CHECK-NEXT:    ptrue p0.d
374; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
375; CHECK-NEXT:    mov z0.h, p0/m, h0
376; CHECK-NEXT:    ret
377  %res = insertelement <vscale x 2 x bfloat> undef, bfloat %h, i64 %idx
378  ret <vscale x 2 x bfloat> %res
379}
380
381define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
382; CHECK-LABEL: test_insert_with_index_nxv4bf16:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    index z1.s, #0, #1
385; CHECK-NEXT:    mov z2.s, w0
386; CHECK-NEXT:    ptrue p0.s
387; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
388; CHECK-NEXT:    mov z0.h, p0/m, h0
389; CHECK-NEXT:    ret
390  %res = insertelement <vscale x 4 x bfloat> undef, bfloat %h, i64 %idx
391  ret <vscale x 4 x bfloat> %res
392}
393
394define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
395; CHECK-LABEL: test_insert_with_index_nxv8bf16:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    index z1.h, #0, #1
398; CHECK-NEXT:    mov z2.h, w0
399; CHECK-NEXT:    ptrue p0.h
400; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
401; CHECK-NEXT:    mov z0.h, p0/m, h0
402; CHECK-NEXT:    ret
403  %res = insertelement <vscale x 8 x bfloat> undef, bfloat %h, i64 %idx
404  ret <vscale x 8 x bfloat> %res
405}
406
407define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
408; CHECK-LABEL: test_insert_with_index_nxv2f32:
409; CHECK:       // %bb.0:
410; CHECK-NEXT:    index z1.d, #0, #1
411; CHECK-NEXT:    mov z2.d, x0
412; CHECK-NEXT:    ptrue p0.d
413; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
414; CHECK-NEXT:    mov z0.s, p0/m, s0
415; CHECK-NEXT:    ret
416  %res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx
417  ret <vscale x 2 x float> %res
418}
419
420define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
421; CHECK-LABEL: test_insert_with_index_nxv4f32:
422; CHECK:       // %bb.0:
423; CHECK-NEXT:    index z1.s, #0, #1
424; CHECK-NEXT:    mov z2.s, w0
425; CHECK-NEXT:    ptrue p0.s
426; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
427; CHECK-NEXT:    mov z0.s, p0/m, s0
428; CHECK-NEXT:    ret
429  %res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx
430  ret <vscale x 4 x float> %res
431}
432
433define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
434; CHECK-LABEL: test_insert_with_index_nxv2f64:
435; CHECK:       // %bb.0:
436; CHECK-NEXT:    index z1.d, #0, #1
437; CHECK-NEXT:    mov z2.d, x0
438; CHECK-NEXT:    ptrue p0.d
439; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
440; CHECK-NEXT:    mov z0.d, p0/m, d0
441; CHECK-NEXT:    ret
442  %res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx
443  ret <vscale x 2 x double> %res
444}
445
446;Predicate insert
447define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) {
448; CHECK-LABEL: test_predicate_insert_2xi1_immediate:
449; CHECK:       // %bb.0:
450; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
451; CHECK-NEXT:    ptrue p0.d, vl1
452; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
453; CHECK-NEXT:    mov z0.d, p0/m, x0
454; CHECK-NEXT:    ptrue p0.d
455; CHECK-NEXT:    and z0.d, z0.d, #0x1
456; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
457; CHECK-NEXT:    ret
458  %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0
459  ret <vscale x 2 x i1> %res
460}
461
462define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) {
463; CHECK-LABEL: test_predicate_insert_4xi1_immediate:
464; CHECK:       // %bb.0:
465; CHECK-NEXT:    mov w8, #2 // =0x2
466; CHECK-NEXT:    index z0.s, #0, #1
467; CHECK-NEXT:    ptrue p1.s
468; CHECK-NEXT:    mov z1.s, w8
469; CHECK-NEXT:    cmpeq p2.s, p1/z, z0.s, z1.s
470; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
471; CHECK-NEXT:    mov z0.s, p2/m, w0
472; CHECK-NEXT:    and z0.s, z0.s, #0x1
473; CHECK-NEXT:    cmpne p0.s, p1/z, z0.s, #0
474; CHECK-NEXT:    ret
475  %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2
476  ret <vscale x 4 x i1> %res
477}
478
479define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
480; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
481; CHECK:       // %bb.0:
482; CHECK-NEXT:    index z0.h, #0, #1
483; CHECK-NEXT:    mov w8, w0
484; CHECK-NEXT:    ptrue p1.h
485; CHECK-NEXT:    mov z1.h, w8
486; CHECK-NEXT:    mov w8, #1 // =0x1
487; CHECK-NEXT:    cmpeq p2.h, p1/z, z0.h, z1.h
488; CHECK-NEXT:    mov z0.h, p0/z, #1 // =0x1
489; CHECK-NEXT:    mov z0.h, p2/m, w8
490; CHECK-NEXT:    and z0.h, z0.h, #0x1
491; CHECK-NEXT:    cmpne p0.h, p1/z, z0.h, #0
492; CHECK-NEXT:    ret
493  %res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx
494  ret <vscale x 8 x i1> %res
495}
496
497define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) {
498; CHECK-LABEL: test_predicate_insert_16xi1_immediate:
499; CHECK:       // %bb.0:
500; CHECK-NEXT:    mov w8, #4 // =0x4
501; CHECK-NEXT:    index z0.b, #0, #1
502; CHECK-NEXT:    ptrue p1.b
503; CHECK-NEXT:    mov z1.b, w8
504; CHECK-NEXT:    mov w8, wzr
505; CHECK-NEXT:    cmpeq p2.b, p1/z, z0.b, z1.b
506; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
507; CHECK-NEXT:    mov z0.b, p2/m, w8
508; CHECK-NEXT:    and z0.b, z0.b, #0x1
509; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
510; CHECK-NEXT:    ret
511  %res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4
512  ret <vscale x 16 x i1> %res
513}
514
515
516define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
517; CHECK-LABEL: test_predicate_insert_2xi1:
518; CHECK:       // %bb.0:
519; CHECK-NEXT:    index z0.d, #0, #1
520; CHECK-NEXT:    mov w8, w1
521; CHECK-NEXT:    ptrue p1.d
522; CHECK-NEXT:    mov z1.d, x8
523; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
524; CHECK-NEXT:    cmpeq p2.d, p1/z, z0.d, z1.d
525; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
526; CHECK-NEXT:    mov z0.d, p2/m, x0
527; CHECK-NEXT:    and z0.d, z0.d, #0x1
528; CHECK-NEXT:    cmpne p0.d, p1/z, z0.d, #0
529; CHECK-NEXT:    ret
530  %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx
531  ret <vscale x 2 x i1> %res
532}
533
534define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
535; CHECK-LABEL: test_predicate_insert_4xi1:
536; CHECK:       // %bb.0:
537; CHECK-NEXT:    index z0.s, #0, #1
538; CHECK-NEXT:    mov w8, w1
539; CHECK-NEXT:    ptrue p1.s
540; CHECK-NEXT:    mov z1.s, w8
541; CHECK-NEXT:    cmpeq p2.s, p1/z, z0.s, z1.s
542; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
543; CHECK-NEXT:    mov z0.s, p2/m, w0
544; CHECK-NEXT:    and z0.s, z0.s, #0x1
545; CHECK-NEXT:    cmpne p0.s, p1/z, z0.s, #0
546; CHECK-NEXT:    ret
547  %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx
548  ret <vscale x 4 x i1> %res
549}
550define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
551; CHECK-LABEL: test_predicate_insert_8xi1:
552; CHECK:       // %bb.0:
553; CHECK-NEXT:    index z0.h, #0, #1
554; CHECK-NEXT:    mov w8, w1
555; CHECK-NEXT:    ptrue p1.h
556; CHECK-NEXT:    mov z1.h, w8
557; CHECK-NEXT:    cmpeq p2.h, p1/z, z0.h, z1.h
558; CHECK-NEXT:    mov z0.h, p0/z, #1 // =0x1
559; CHECK-NEXT:    mov z0.h, p2/m, w0
560; CHECK-NEXT:    and z0.h, z0.h, #0x1
561; CHECK-NEXT:    cmpne p0.h, p1/z, z0.h, #0
562; CHECK-NEXT:    ret
563  %res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx
564  ret <vscale x 8 x i1> %res
565}
566
567define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
568; CHECK-LABEL: test_predicate_insert_16xi1:
569; CHECK:       // %bb.0:
570; CHECK-NEXT:    index z0.b, #0, #1
571; CHECK-NEXT:    mov w8, w1
572; CHECK-NEXT:    ptrue p1.b
573; CHECK-NEXT:    mov z1.b, w8
574; CHECK-NEXT:    cmpeq p2.b, p1/z, z0.b, z1.b
575; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
576; CHECK-NEXT:    mov z0.b, p2/m, w0
577; CHECK-NEXT:    and z0.b, z0.b, #0x1
578; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
579; CHECK-NEXT:    ret
580  %res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx
581  ret <vscale x 16 x i1> %res
582}
583
584define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable {
585; CHECK-LABEL: test_predicate_insert_32xi1:
586; CHECK:       // %bb.0:
587; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
588; CHECK-NEXT:    .cfi_def_cfa_offset 16
589; CHECK-NEXT:    .cfi_offset w29, -16
590; CHECK-NEXT:    addvl sp, sp, #-2
591; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
592; CHECK-NEXT:    rdvl x8, #2
593; CHECK-NEXT:    mov z0.b, p1/z, #1 // =0x1
594; CHECK-NEXT:    mov z1.b, p0/z, #1 // =0x1
595; CHECK-NEXT:    sub x8, x8, #1
596; CHECK-NEXT:    mov w9, w1
597; CHECK-NEXT:    ptrue p1.b
598; CHECK-NEXT:    cmp x9, x8
599; CHECK-NEXT:    csel x8, x9, x8, lo
600; CHECK-NEXT:    mov x9, sp
601; CHECK-NEXT:    st1b { z0.b }, p1, [sp, #1, mul vl]
602; CHECK-NEXT:    st1b { z1.b }, p1, [sp]
603; CHECK-NEXT:    strb w0, [x9, x8]
604; CHECK-NEXT:    ld1b { z0.b }, p1/z, [sp]
605; CHECK-NEXT:    ld1b { z1.b }, p1/z, [sp, #1, mul vl]
606; CHECK-NEXT:    and z0.b, z0.b, #0x1
607; CHECK-NEXT:    and z1.b, z1.b, #0x1
608; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
609; CHECK-NEXT:    cmpne p1.b, p1/z, z1.b, #0
610; CHECK-NEXT:    addvl sp, sp, #2
611; CHECK-NEXT:    .cfi_def_cfa wsp, 16
612; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
613; CHECK-NEXT:    .cfi_def_cfa_offset 0
614; CHECK-NEXT:    .cfi_restore w29
615; CHECK-NEXT:    ret
616  %res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx
617  ret <vscale x 32 x i1> %res
618}
619