xref: /llvm-project/llvm/test/CodeGen/AArch64/extract-vector-elt.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for extract_v4i32_vector_insert
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const
7; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_extract
8; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
9
10define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
11; CHECK-SD-LABEL: extract_v2i64_undef_index:
12; CHECK-SD:       // %bb.0: // %entry
13; CHECK-SD-NEXT:    fmov x0, d0
14; CHECK-SD-NEXT:    ret
15;
16; CHECK-GI-LABEL: extract_v2i64_undef_index:
17; CHECK-GI:       // %bb.0: // %entry
18; CHECK-GI-NEXT:    str q0, [sp, #-16]!
19; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
20; CHECK-GI-NEXT:    ldr x0, [sp], #16
21; CHECK-GI-NEXT:    ret
22entry:
23  %d = extractelement <2 x i64> %a, i32 undef
24  ret i64 %d
25}
26
27define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) {
28; CHECK-LABEL: extract_v2i64_undef_vector:
29; CHECK:       // %bb.0: // %entry
30; CHECK-NEXT:    ret
31entry:
32  %d = extractelement <2 x i64> undef, i32 %c
33  ret i64 %d
34}
35
36define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) {
37; CHECK-SD-LABEL: extract_v2i64_opaque:
38; CHECK-SD:       // %bb.0: // %entry
39; CHECK-SD-NEXT:    sub sp, sp, #16
40; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
41; CHECK-SD-NEXT:    mov x8, sp
42; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
43; CHECK-SD-NEXT:    str q0, [sp]
44; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
45; CHECK-SD-NEXT:    ldr x0, [x8]
46; CHECK-SD-NEXT:    add sp, sp, #16
47; CHECK-SD-NEXT:    ret
48;
49; CHECK-GI-LABEL: extract_v2i64_opaque:
50; CHECK-GI:       // %bb.0: // %entry
51; CHECK-GI-NEXT:    sub sp, sp, #16
52; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
53; CHECK-GI-NEXT:    mov w9, w0
54; CHECK-GI-NEXT:    mov x8, sp
55; CHECK-GI-NEXT:    str q0, [sp]
56; CHECK-GI-NEXT:    and x9, x9, #0x1
57; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
58; CHECK-GI-NEXT:    add sp, sp, #16
59; CHECK-GI-NEXT:    ret
60entry:
61  %d = extractelement <2 x i64> %a, i32 %c
62  ret i64 %d
63}
64
65define i64 @extract_v2i64_oob(<2 x i64> %a, i32 %c) {
66; CHECK-LABEL: extract_v2i64_oob:
67; CHECK:       // %bb.0: // %entry
68; CHECK-NEXT:    ret
69entry:
70  %d = extractelement <2 x i64> %a, i32 5
71  ret i64 %d
72}
73
74define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) {
75; CHECK-SD-LABEL: extract_v2i64_freeze:
76; CHECK-SD:       // %bb.0: // %entry
77; CHECK-SD-NEXT:    sub sp, sp, #16
78; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
79; CHECK-SD-NEXT:    mov x8, sp
80; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
81; CHECK-SD-NEXT:    str q0, [sp]
82; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
83; CHECK-SD-NEXT:    ldr x0, [x8]
84; CHECK-SD-NEXT:    add sp, sp, #16
85; CHECK-SD-NEXT:    ret
86;
87; CHECK-GI-LABEL: extract_v2i64_freeze:
88; CHECK-GI:       // %bb.0: // %entry
89; CHECK-GI-NEXT:    sub sp, sp, #16
90; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
91; CHECK-GI-NEXT:    mov w9, w0
92; CHECK-GI-NEXT:    mov x8, sp
93; CHECK-GI-NEXT:    str q0, [sp]
94; CHECK-GI-NEXT:    and x9, x9, #0x1
95; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
96; CHECK-GI-NEXT:    add sp, sp, #16
97; CHECK-GI-NEXT:    ret
98entry:
99  %fvector = freeze <2 x i64> %a
100  %d = extractelement <2 x i64> %fvector, i32 %c
101  ret i64 %d
102}
103
104define i64 @extract_v2i64_extract_of_insert(<2 x i64> %a, i64 %element, i64 %c) {
105; CHECK-LABEL: extract_v2i64_extract_of_insert:
106; CHECK:       // %bb.0: // %entry
107; CHECK-NEXT:    ret
108entry:
109  %vector = insertelement <2 x i64> %a, i64 %element, i64 %c
110  %d = extractelement <2 x i64> %vector, i64 %c
111  ret i64 %d
112}
113
114define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %element) {
115; CHECK-SD-LABEL: extract_v2i64_extract_of_insert_different_const:
116; CHECK-SD:       // %bb.0: // %entry
117; CHECK-SD-NEXT:    mov x0, v0.d[1]
118; CHECK-SD-NEXT:    ret
119;
120; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const:
121; CHECK-GI:       // %bb.0: // %entry
122; CHECK-GI-NEXT:    mov d0, v0.d[1]
123; CHECK-GI-NEXT:    fmov x0, d0
124; CHECK-GI-NEXT:    ret
125entry:
126  %vector = insertelement <2 x i64> %a, i64 %element, i64 0
127  %d = extractelement <2 x i64> %vector, i64 1
128  ret i64 %d
129}
130
131define i64 @extract_v2i64_extract_build_vector_const(<2 x i64> %a, i32 %c) {
132; CHECK-LABEL: extract_v2i64_extract_build_vector_const:
133; CHECK:       // %bb.0: // %entry
134; CHECK-NEXT:    mov w0, #11 // =0xb
135; CHECK-NEXT:    ret
136entry:
137  %d = extractelement <2 x i64> <i64 42, i64 11>, i32 1
138  ret i64 %d
139}
140
141define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
142; CHECK-SD-LABEL: extract_v2i64_extract_build_vector_opaque:
143; CHECK-SD:       // %bb.0: // %entry
144; CHECK-SD-NEXT:    sub sp, sp, #16
145; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
146; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
147; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
148; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
149; CHECK-SD-NEXT:    mov x8, sp
150; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
151; CHECK-SD-NEXT:    str q0, [sp]
152; CHECK-SD-NEXT:    ldr x0, [x8]
153; CHECK-SD-NEXT:    add sp, sp, #16
154; CHECK-SD-NEXT:    ret
155;
156; CHECK-GI-LABEL: extract_v2i64_extract_build_vector_opaque:
157; CHECK-GI:       // %bb.0: // %entry
158; CHECK-GI-NEXT:    sub sp, sp, #16
159; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
160; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
161; CHECK-GI-NEXT:    mov x9, sp
162; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
163; CHECK-GI-NEXT:    mov w8, w0
164; CHECK-GI-NEXT:    and x8, x8, #0x1
165; CHECK-GI-NEXT:    str q0, [sp]
166; CHECK-GI-NEXT:    ldr x0, [x9, x8, lsl #3]
167; CHECK-GI-NEXT:    add sp, sp, #16
168; CHECK-GI-NEXT:    ret
169entry:
170  %d = extractelement <2 x i64> <i64 42, i64 11>, i32 %c
171  ret i64 %d
172}
173
174
175define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) {
176; CHECK-SD-LABEL: extract_v2i32_zext:
177; CHECK-SD:       // %bb.0: // %entry
178; CHECK-SD-NEXT:    sub sp, sp, #16
179; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
180; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
181; CHECK-SD-NEXT:    mov x8, sp
182; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
183; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
184; CHECK-SD-NEXT:    str q0, [sp]
185; CHECK-SD-NEXT:    ldr x0, [x8]
186; CHECK-SD-NEXT:    add sp, sp, #16
187; CHECK-SD-NEXT:    ret
188;
189; CHECK-GI-LABEL: extract_v2i32_zext:
190; CHECK-GI:       // %bb.0: // %entry
191; CHECK-GI-NEXT:    sub sp, sp, #16
192; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
193; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
194; CHECK-GI-NEXT:    mov w9, w0
195; CHECK-GI-NEXT:    mov x8, sp
196; CHECK-GI-NEXT:    and x9, x9, #0x1
197; CHECK-GI-NEXT:    str q0, [sp]
198; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
199; CHECK-GI-NEXT:    add sp, sp, #16
200; CHECK-GI-NEXT:    ret
201entry:
202  %zvector = zext <2 x i32> %a to <2 x i64>
203  %d = extractelement <2 x i64> %zvector, i32 %c
204  ret i64 %d
205}
206
207define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) {
208; CHECK-SD-LABEL: extract_v2double_fptosi:
209; CHECK-SD:       // %bb.0: // %entry
210; CHECK-SD-NEXT:    sub sp, sp, #16
211; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
212; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
213; CHECK-SD-NEXT:    mov x8, sp
214; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
215; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
216; CHECK-SD-NEXT:    str q0, [sp]
217; CHECK-SD-NEXT:    ldr x0, [x8]
218; CHECK-SD-NEXT:    add sp, sp, #16
219; CHECK-SD-NEXT:    ret
220;
221; CHECK-GI-LABEL: extract_v2double_fptosi:
222; CHECK-GI:       // %bb.0: // %entry
223; CHECK-GI-NEXT:    sub sp, sp, #16
224; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
225; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
226; CHECK-GI-NEXT:    mov w9, w0
227; CHECK-GI-NEXT:    mov x8, sp
228; CHECK-GI-NEXT:    and x9, x9, #0x1
229; CHECK-GI-NEXT:    str q0, [sp]
230; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
231; CHECK-GI-NEXT:    add sp, sp, #16
232; CHECK-GI-NEXT:    ret
233entry:
234  %vector = fptosi <2 x double> %a to <2 x i64>
235  %d = extractelement <2 x i64> %vector, i32 %c
236  ret i64 %d
237}
238
239define double @extract_v2double_fneg(<2 x double> %a, i32 %c) {
240; CHECK-SD-LABEL: extract_v2double_fneg:
241; CHECK-SD:       // %bb.0: // %entry
242; CHECK-SD-NEXT:    sub sp, sp, #16
243; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
244; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
245; CHECK-SD-NEXT:    mov x8, sp
246; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
247; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
248; CHECK-SD-NEXT:    str q0, [sp]
249; CHECK-SD-NEXT:    ldr d0, [x8]
250; CHECK-SD-NEXT:    add sp, sp, #16
251; CHECK-SD-NEXT:    ret
252;
253; CHECK-GI-LABEL: extract_v2double_fneg:
254; CHECK-GI:       // %bb.0: // %entry
255; CHECK-GI-NEXT:    sub sp, sp, #16
256; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
257; CHECK-GI-NEXT:    fneg v0.2d, v0.2d
258; CHECK-GI-NEXT:    mov w9, w0
259; CHECK-GI-NEXT:    mov x8, sp
260; CHECK-GI-NEXT:    and x9, x9, #0x1
261; CHECK-GI-NEXT:    str q0, [sp]
262; CHECK-GI-NEXT:    ldr d0, [x8, x9, lsl #3]
263; CHECK-GI-NEXT:    add sp, sp, #16
264; CHECK-GI-NEXT:    ret
265entry:
266  %vector = fneg <2 x double> %a
267  %d = extractelement <2 x double> %vector, i32 %c
268  ret double %d
269}
270
271define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) {
272; CHECK-SD-LABEL: extract_v4i32_add:
273; CHECK-SD:       // %bb.0: // %entry
274; CHECK-SD-NEXT:    sub sp, sp, #16
275; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
276; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
277; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
278; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
279; CHECK-SD-NEXT:    mov x8, sp
280; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
281; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
282; CHECK-SD-NEXT:    str q0, [sp]
283; CHECK-SD-NEXT:    ldr w0, [x8]
284; CHECK-SD-NEXT:    add sp, sp, #16
285; CHECK-SD-NEXT:    ret
286;
287; CHECK-GI-LABEL: extract_v4i32_add:
288; CHECK-GI:       // %bb.0: // %entry
289; CHECK-GI-NEXT:    sub sp, sp, #16
290; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
291; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
292; CHECK-GI-NEXT:    mov x9, sp
293; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
294; CHECK-GI-NEXT:    mov w8, w0
295; CHECK-GI-NEXT:    and x8, x8, #0x3
296; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
297; CHECK-GI-NEXT:    str q0, [sp]
298; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
299; CHECK-GI-NEXT:    add sp, sp, #16
300; CHECK-GI-NEXT:    ret
301entry:
302  %vector = add <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
303  %d = extractelement <4 x i32> %vector, i32 %c
304  ret i32 %d
305}
306
307define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) {
308; CHECK-SD-LABEL: extract_v4i32_minimum:
309; CHECK-SD:       // %bb.0: // %entry
310; CHECK-SD-NEXT:    sub sp, sp, #16
311; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
312; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
313; CHECK-SD-NEXT:    mov x8, sp
314; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
315; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
316; CHECK-SD-NEXT:    str q0, [sp]
317; CHECK-SD-NEXT:    ldr s0, [x8]
318; CHECK-SD-NEXT:    add sp, sp, #16
319; CHECK-SD-NEXT:    ret
320;
321; CHECK-GI-LABEL: extract_v4i32_minimum:
322; CHECK-GI:       // %bb.0: // %entry
323; CHECK-GI-NEXT:    sub sp, sp, #16
324; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
325; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v1.4s
326; CHECK-GI-NEXT:    mov w8, w0
327; CHECK-GI-NEXT:    mov x9, sp
328; CHECK-GI-NEXT:    and x8, x8, #0x3
329; CHECK-GI-NEXT:    str q0, [sp]
330; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
331; CHECK-GI-NEXT:    add sp, sp, #16
332; CHECK-GI-NEXT:    ret
333entry:
334  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> %b)
335  %d = extractelement <4 x float> %vector, i32 %c
336  ret float %d
337}
338
339define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) {
340; CHECK-SD-LABEL: extract_v4i32_minimum_build_vector:
341; CHECK-SD:       // %bb.0: // %entry
342; CHECK-SD-NEXT:    sub sp, sp, #16
343; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
344; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
345; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
346; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
347; CHECK-SD-NEXT:    mov x8, sp
348; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
349; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
350; CHECK-SD-NEXT:    str q0, [sp]
351; CHECK-SD-NEXT:    ldr s0, [x8]
352; CHECK-SD-NEXT:    add sp, sp, #16
353; CHECK-SD-NEXT:    ret
354;
355; CHECK-GI-LABEL: extract_v4i32_minimum_build_vector:
356; CHECK-GI:       // %bb.0: // %entry
357; CHECK-GI-NEXT:    sub sp, sp, #16
358; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
359; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
360; CHECK-GI-NEXT:    mov x9, sp
361; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
362; CHECK-GI-NEXT:    mov w8, w0
363; CHECK-GI-NEXT:    and x8, x8, #0x3
364; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v1.4s
365; CHECK-GI-NEXT:    str q0, [sp]
366; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
367; CHECK-GI-NEXT:    add sp, sp, #16
368; CHECK-GI-NEXT:    ret
369entry:
370  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
371  %d = extractelement <4 x float> %vector, i32 %c
372  ret float %d
373}
374
375define float @extract_v4i32_minimum_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) {
376; CHECK-LABEL: extract_v4i32_minimum_build_vector_const:
377; CHECK:       // %bb.0: // %entry
378; CHECK-NEXT:    adrp x8, .LCPI15_0
379; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
380; CHECK-NEXT:    fmin v0.4s, v0.4s, v1.4s
381; CHECK-NEXT:    mov s0, v0.s[1]
382; CHECK-NEXT:    ret
383entry:
384  %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
385  %d = extractelement <4 x float> %vector, i32 1
386  ret float %d
387}
388
389define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) {
390; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector:
391; CHECK-SD:       // %bb.0: // %entry
392; CHECK-SD-NEXT:    sub sp, sp, #16
393; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
394; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
395; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
396; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
397; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
398; CHECK-SD-NEXT:    mov x8, sp
399; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
400; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
401; CHECK-SD-NEXT:    str q0, [sp]
402; CHECK-SD-NEXT:    ldr s0, [x8]
403; CHECK-SD-NEXT:    add sp, sp, #16
404; CHECK-SD-NEXT:    ret
405;
406; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector:
407; CHECK-GI:       // %bb.0: // %entry
408; CHECK-GI-NEXT:    sub sp, sp, #16
409; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
410; CHECK-GI-NEXT:    mvni v1.4s, #128, lsl #24
411; CHECK-GI-NEXT:    mov w8, w0
412; CHECK-GI-NEXT:    mov x9, sp
413; CHECK-GI-NEXT:    and x8, x8, #0x3
414; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
415; CHECK-GI-NEXT:    str q0, [sp]
416; CHECK-GI-NEXT:    ldr s0, [x9, x8, lsl #2]
417; CHECK-GI-NEXT:    add sp, sp, #16
418; CHECK-GI-NEXT:    ret
419entry:
420  %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
421  %d = extractelement <4 x float> %vector, i32 %c
422  ret float %d
423}
424
425define float @extract_v4i32_copysign_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) {
426; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector_const:
427; CHECK-SD:       // %bb.0: // %entry
428; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
429; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
430; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
431; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
432; CHECK-SD-NEXT:    mov s0, v0.s[2]
433; CHECK-SD-NEXT:    ret
434;
435; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector_const:
436; CHECK-GI:       // %bb.0: // %entry
437; CHECK-GI-NEXT:    mvni v1.4s, #128, lsl #24
438; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
439; CHECK-GI-NEXT:    mov s0, v0.s[2]
440; CHECK-GI-NEXT:    ret
441entry:
442  %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>)
443  %d = extractelement <4 x float> %vector, i32 2
444  ret float %d
445}
446
447
448define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) {
449; CHECK-SD-LABEL: extract_v4i32_icmp:
450; CHECK-SD:       // %bb.0: // %entry
451; CHECK-SD-NEXT:    sub sp, sp, #16
452; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
453; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
454; CHECK-SD-NEXT:    movi v2.4s, #1
455; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
456; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
457; CHECK-SD-NEXT:    mov x8, sp
458; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
459; CHECK-SD-NEXT:    cmge v0.4s, v1.4s, v0.4s
460; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
461; CHECK-SD-NEXT:    str q0, [sp]
462; CHECK-SD-NEXT:    ldr w0, [x8]
463; CHECK-SD-NEXT:    add sp, sp, #16
464; CHECK-SD-NEXT:    ret
465;
466; CHECK-GI-LABEL: extract_v4i32_icmp:
467; CHECK-GI:       // %bb.0: // %entry
468; CHECK-GI-NEXT:    sub sp, sp, #16
469; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
470; CHECK-GI-NEXT:    adrp x8, .LCPI18_0
471; CHECK-GI-NEXT:    movi v2.4s, #1
472; CHECK-GI-NEXT:    mov x9, sp
473; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
474; CHECK-GI-NEXT:    mov w8, w0
475; CHECK-GI-NEXT:    and x8, x8, #0x3
476; CHECK-GI-NEXT:    cmge v0.4s, v1.4s, v0.4s
477; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
478; CHECK-GI-NEXT:    str q0, [sp]
479; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
480; CHECK-GI-NEXT:    add sp, sp, #16
481; CHECK-GI-NEXT:    ret
482entry:
483  %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
484  %zvector = zext <4 x i1> %vector to <4 x i32>
485  %d = extractelement <4 x i32> %zvector, i32 %c
486  ret i32 %d
487}
488
489define i32 @extract_v4i32_icmp_const(<4 x i32> %a, <4 x i32> %b, i32 %c) {
490; CHECK-SD-LABEL: extract_v4i32_icmp_const:
491; CHECK-SD:       // %bb.0: // %entry
492; CHECK-SD-NEXT:    adrp x8, .LCPI19_0
493; CHECK-SD-NEXT:    movi v2.4s, #1
494; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
495; CHECK-SD-NEXT:    cmge v0.4s, v1.4s, v0.4s
496; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
497; CHECK-SD-NEXT:    mov w0, v0.s[2]
498; CHECK-SD-NEXT:    ret
499;
500; CHECK-GI-LABEL: extract_v4i32_icmp_const:
501; CHECK-GI:       // %bb.0: // %entry
502; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
503; CHECK-GI-NEXT:    movi v2.4s, #1
504; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
505; CHECK-GI-NEXT:    cmge v0.4s, v1.4s, v0.4s
506; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
507; CHECK-GI-NEXT:    mov s0, v0.s[2]
508; CHECK-GI-NEXT:    fmov w0, s0
509; CHECK-GI-NEXT:    ret
510entry:
511  %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6>
512  %zvector = zext <4 x i1> %vector to <4 x i32>
513  %d = extractelement <4 x i32> %zvector, i32 2
514  ret i32 %d
515}
516
517define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) {
518; CHECK-SD-LABEL: extract_v4float_fcmp:
519; CHECK-SD:       // %bb.0: // %entry
520; CHECK-SD-NEXT:    sub sp, sp, #16
521; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
522; CHECK-SD-NEXT:    movi v1.4s, #1
523; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
524; CHECK-SD-NEXT:    mov x8, sp
525; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
526; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
527; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
528; CHECK-SD-NEXT:    str q0, [sp]
529; CHECK-SD-NEXT:    ldr w0, [x8]
530; CHECK-SD-NEXT:    add sp, sp, #16
531; CHECK-SD-NEXT:    ret
532;
533; CHECK-GI-LABEL: extract_v4float_fcmp:
534; CHECK-GI:       // %bb.0: // %entry
535; CHECK-GI-NEXT:    sub sp, sp, #16
536; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
537; CHECK-GI-NEXT:    fmov v1.4s, #1.00000000
538; CHECK-GI-NEXT:    mov w8, w0
539; CHECK-GI-NEXT:    mov x9, sp
540; CHECK-GI-NEXT:    and x8, x8, #0x3
541; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
542; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
543; CHECK-GI-NEXT:    movi v1.4s, #1
544; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
545; CHECK-GI-NEXT:    bic v0.16b, v1.16b, v0.16b
546; CHECK-GI-NEXT:    str q0, [sp]
547; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
548; CHECK-GI-NEXT:    add sp, sp, #16
549; CHECK-GI-NEXT:    ret
550entry:
551  %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0>
552  %zvector = zext <4 x i1> %vector to <4 x i32>
553  %d = extractelement <4 x i32> %zvector, i32 %c
554  ret i32 %d
555}
556
557define i32 @extract_v4float_fcmp_const(<4 x float> %a, <4 x float> %b, i32 %c) {
558; CHECK-SD-LABEL: extract_v4float_fcmp_const:
559; CHECK-SD:       // %bb.0: // %entry
560; CHECK-SD-NEXT:    movi v1.4s, #1
561; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
562; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
563; CHECK-SD-NEXT:    mov w0, v0.s[1]
564; CHECK-SD-NEXT:    ret
565;
566; CHECK-GI-LABEL: extract_v4float_fcmp_const:
567; CHECK-GI:       // %bb.0: // %entry
568; CHECK-GI-NEXT:    fmov v1.4s, #1.00000000
569; CHECK-GI-NEXT:    fcmge v2.4s, v0.4s, v1.4s
570; CHECK-GI-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
571; CHECK-GI-NEXT:    movi v1.4s, #1
572; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v2.16b
573; CHECK-GI-NEXT:    bic v0.16b, v1.16b, v0.16b
574; CHECK-GI-NEXT:    mov s0, v0.s[1]
575; CHECK-GI-NEXT:    fmov w0, s0
576; CHECK-GI-NEXT:    ret
577entry:
578  %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0>
579  %zvector = zext <4 x i1> %vector to <4 x i32>
580  %d = extractelement <4 x i32> %zvector, i32 1
581  ret i32 %d
582}
583
584define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) {
585; CHECK-SD-LABEL: extract_v4i32_select:
586; CHECK-SD:       // %bb.0: // %entry
587; CHECK-SD-NEXT:    sub sp, sp, #16
588; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
589; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
590; CHECK-SD-NEXT:    adrp x8, .LCPI22_0
591; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
592; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
593; CHECK-SD-NEXT:    mov x8, sp
594; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
595; CHECK-SD-NEXT:    shl v1.4s, v1.4s, #31
596; CHECK-SD-NEXT:    cmlt v1.4s, v1.4s, #0
597; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
598; CHECK-SD-NEXT:    str q0, [sp]
599; CHECK-SD-NEXT:    ldr w0, [x8]
600; CHECK-SD-NEXT:    add sp, sp, #16
601; CHECK-SD-NEXT:    ret
602;
603; CHECK-GI-LABEL: extract_v4i32_select:
604; CHECK-GI:       // %bb.0: // %entry
605; CHECK-GI-NEXT:    sub sp, sp, #16
606; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
607; CHECK-GI-NEXT:    ushll v1.4s, v2.4h, #0
608; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
609; CHECK-GI-NEXT:    mov x9, sp
610; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
611; CHECK-GI-NEXT:    mov w8, w0
612; CHECK-GI-NEXT:    and x8, x8, #0x3
613; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
614; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
615; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
616; CHECK-GI-NEXT:    str q0, [sp]
617; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
618; CHECK-GI-NEXT:    add sp, sp, #16
619; CHECK-GI-NEXT:    ret
620entry:
621  %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6>
622  %d = extractelement <4 x i32> %vector, i32 %c
623  ret i32 %d
624}
625
626define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) {
627; CHECK-SD-LABEL: extract_v4i32_select_const:
628; CHECK-SD:       // %bb.0: // %entry
629; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
630; CHECK-SD-NEXT:    movi v2.4s, #17
631; CHECK-SD-NEXT:    shl v1.4s, v1.4s, #31
632; CHECK-SD-NEXT:    cmlt v1.4s, v1.4s, #0
633; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
634; CHECK-SD-NEXT:    mov w0, v0.s[2]
635; CHECK-SD-NEXT:    ret
636;
637; CHECK-GI-LABEL: extract_v4i32_select_const:
638; CHECK-GI:       // %bb.0: // %entry
639; CHECK-GI-NEXT:    ushll v1.4s, v2.4h, #0
640; CHECK-GI-NEXT:    adrp x8, .LCPI23_0
641; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI23_0]
642; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
643; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
644; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
645; CHECK-GI-NEXT:    mov s0, v0.s[2]
646; CHECK-GI-NEXT:    fmov w0, s0
647; CHECK-GI-NEXT:    ret
648entry:
649  %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6>
650  %d = extractelement <4 x i32> %vector, i32 2
651  ret i32 %d
652}
653
654define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) {
655; CHECK-SD-LABEL: extract_v4i32_abs:
656; CHECK-SD:       // %bb.0: // %entry
657; CHECK-SD-NEXT:    sub sp, sp, #16
658; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
659; CHECK-SD-NEXT:    frintp v0.4s, v0.4s
660; CHECK-SD-NEXT:    mov x8, sp
661; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
662; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
663; CHECK-SD-NEXT:    frintm v0.4s, v0.4s
664; CHECK-SD-NEXT:    fabs v0.4s, v0.4s
665; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
666; CHECK-SD-NEXT:    abs v0.4s, v0.4s
667; CHECK-SD-NEXT:    str q0, [sp]
668; CHECK-SD-NEXT:    ldr w0, [x8]
669; CHECK-SD-NEXT:    add sp, sp, #16
670; CHECK-SD-NEXT:    ret
671;
672; CHECK-GI-LABEL: extract_v4i32_abs:
673; CHECK-GI:       // %bb.0: // %entry
674; CHECK-GI-NEXT:    sub sp, sp, #16
675; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
676; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
677; CHECK-GI-NEXT:    mov w9, w0
678; CHECK-GI-NEXT:    mov x8, sp
679; CHECK-GI-NEXT:    and x9, x9, #0x3
680; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
681; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
682; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
683; CHECK-GI-NEXT:    abs v0.4s, v0.4s
684; CHECK-GI-NEXT:    str q0, [sp]
685; CHECK-GI-NEXT:    ldr w0, [x8, x9, lsl #2]
686; CHECK-GI-NEXT:    add sp, sp, #16
687; CHECK-GI-NEXT:    ret
688entry:
689  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> %a)
690  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
691  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
692  %abs = fptosi <4 x float> %fabs to <4 x i32>
693  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
694  %d = extractelement <4 x i32> %vector, i32 %c
695  ret i32 %d
696}
697
698define i32 @extract_v4i32_abs_const(<4 x float> %a, i32 %c) {
699; CHECK-SD-LABEL: extract_v4i32_abs_const:
700; CHECK-SD:       // %bb.0: // %entry
701; CHECK-SD-NEXT:    mov w0, #4 // =0x4
702; CHECK-SD-NEXT:    ret
703;
704; CHECK-GI-LABEL: extract_v4i32_abs_const:
705; CHECK-GI:       // %bb.0: // %entry
706; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
707; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI25_0]
708; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
709; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
710; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
711; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
712; CHECK-GI-NEXT:    abs v0.4s, v0.4s
713; CHECK-GI-NEXT:    mov s0, v0.s[1]
714; CHECK-GI-NEXT:    fmov w0, s0
715; CHECK-GI-NEXT:    ret
716entry:
717  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>)
718  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
719  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
720  %abs = fptosi <4 x float> %fabs to <4 x i32>
721  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
722  %d = extractelement <4 x i32> %vector, i32 1
723  ret i32 %d
724}
725
726define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) {
727; CHECK-SD-LABEL: extract_v4i32_abs_half_const:
728; CHECK-SD:       // %bb.0: // %entry
729; CHECK-SD-NEXT:    sub sp, sp, #16
730; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
731; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
732; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
733; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
734; CHECK-SD-NEXT:    mov x8, sp
735; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
736; CHECK-SD-NEXT:    str q0, [sp]
737; CHECK-SD-NEXT:    ldr w0, [x8]
738; CHECK-SD-NEXT:    add sp, sp, #16
739; CHECK-SD-NEXT:    ret
740;
741; CHECK-GI-LABEL: extract_v4i32_abs_half_const:
742; CHECK-GI:       // %bb.0: // %entry
743; CHECK-GI-NEXT:    sub sp, sp, #16
744; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
745; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
746; CHECK-GI-NEXT:    mov x9, sp
747; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
748; CHECK-GI-NEXT:    mov w8, w0
749; CHECK-GI-NEXT:    and x8, x8, #0x3
750; CHECK-GI-NEXT:    frintp v0.4s, v0.4s
751; CHECK-GI-NEXT:    frintm v0.4s, v0.4s
752; CHECK-GI-NEXT:    fabs v0.4s, v0.4s
753; CHECK-GI-NEXT:    fcvtzs v0.4s, v0.4s
754; CHECK-GI-NEXT:    abs v0.4s, v0.4s
755; CHECK-GI-NEXT:    str q0, [sp]
756; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
757; CHECK-GI-NEXT:    add sp, sp, #16
758; CHECK-GI-NEXT:    ret
759entry:
760  %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>)
761  %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil)
762  %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor)
763  %abs = fptosi <4 x float> %fabs to <4 x i32>
764  %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0)
765  %d = extractelement <4 x i32> %vector, i32 %c
766  ret i32 %d
767}
768
769define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) {
770; CHECK-LABEL: extract_v4i32_vector_insert:
771; CHECK:       // %bb.0: // %entry
772; CHECK-NEXT:    sub sp, sp, #16
773; CHECK-NEXT:    .cfi_def_cfa_offset 16
774; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
775; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
776; CHECK-NEXT:    mov x8, sp
777; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
778; CHECK-NEXT:    bfi x8, x0, #2, #2
779; CHECK-NEXT:    mov v1.d[1], v0.d[0]
780; CHECK-NEXT:    str q1, [sp]
781; CHECK-NEXT:    ldr w0, [x8]
782; CHECK-NEXT:    add sp, sp, #16
783; CHECK-NEXT:    ret
784entry:
785  %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
786  %d = extractelement <4 x i32> %vector, i32 %c
787  ret i32 %d
788}
789
790define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
791; CHECK-LABEL: extract_v4i32_vector_insert_const:
792; CHECK:       // %bb.0: // %entry
793; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
794; CHECK-NEXT:    mov w0, v1.s[1]
795; CHECK-NEXT:    ret
796entry:
797  %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0)
798  %d = extractelement <4 x i32> %vector, i32 1
799  ret i32 %d
800}
801
802define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) {
803; CHECK-LABEL: extract_v4i32_vector_extract:
804; CHECK:       // %bb.0: // %entry
805; CHECK-NEXT:    sub sp, sp, #16
806; CHECK-NEXT:    .cfi_def_cfa_offset 16
807; CHECK-NEXT:    mov x8, sp
808; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
809; CHECK-NEXT:    str q0, [sp]
810; CHECK-NEXT:    bfi x8, x0, #2, #2
811; CHECK-NEXT:    ldr w0, [x8]
812; CHECK-NEXT:    add sp, sp, #16
813; CHECK-NEXT:    ret
814entry:
815  %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0)
816  %d = extractelement <4 x i32> %vector, i32 %c
817  ret i32 %d
818}
819
820define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
821; CHECK-LABEL: extract_v4i32_vector_extract_const:
822; CHECK:       // %bb.0: // %entry
823; CHECK-NEXT:    fmov w0, s0
824; CHECK-NEXT:    ret
825entry:
826  %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0)
827  %d = extractelement <4 x i32> %vector, i32 0
828  ret i32 %d
829}
830
831define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
832; CHECK-SD-LABEL: extract_v4i32_load:
833; CHECK-SD:       // %bb.0: // %entry
834; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
835; CHECK-SD-NEXT:    and x8, x0, #0x3
836; CHECK-SD-NEXT:    ldr w0, [x1, x8, lsl #2]
837; CHECK-SD-NEXT:    ret
838;
839; CHECK-GI-LABEL: extract_v4i32_load:
840; CHECK-GI:       // %bb.0: // %entry
841; CHECK-GI-NEXT:    mov w8, w0
842; CHECK-GI-NEXT:    and x8, x8, #0x3
843; CHECK-GI-NEXT:    ldr w0, [x1, x8, lsl #2]
844; CHECK-GI-NEXT:    ret
845entry:
846  %vector = load  <4 x i32>, ptr %arg
847  %d = extractelement <4 x i32> %vector, i32 %c
848  ret i32 %d
849}
850
851define i32 @extract_v4i32_load_const(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
852; CHECK-LABEL: extract_v4i32_load_const:
853; CHECK:       // %bb.0: // %entry
854; CHECK-NEXT:    ldr w0, [x1]
855; CHECK-NEXT:    ret
856entry:
857  %vector = load  <4 x i32>, ptr %arg
858  %d = extractelement <4 x i32> %vector, i32 0
859  ret i32 %d
860}
861
862define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) {
863; CHECK-SD-LABEL: extract_v4i32_bitcast:
864; CHECK-SD:       // %bb.0: // %entry
865; CHECK-SD-NEXT:    sub sp, sp, #16
866; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
867; CHECK-SD-NEXT:    mov x8, sp
868; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
869; CHECK-SD-NEXT:    str q0, [sp]
870; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
871; CHECK-SD-NEXT:    ldr d0, [x8]
872; CHECK-SD-NEXT:    add sp, sp, #16
873; CHECK-SD-NEXT:    ret
874;
875; CHECK-GI-LABEL: extract_v4i32_bitcast:
876; CHECK-GI:       // %bb.0: // %entry
877; CHECK-GI-NEXT:    sub sp, sp, #16
878; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
879; CHECK-GI-NEXT:    mov w9, w0
880; CHECK-GI-NEXT:    mov x8, sp
881; CHECK-GI-NEXT:    str q0, [sp]
882; CHECK-GI-NEXT:    and x9, x9, #0x1
883; CHECK-GI-NEXT:    ldr d0, [x8, x9, lsl #3]
884; CHECK-GI-NEXT:    add sp, sp, #16
885; CHECK-GI-NEXT:    ret
886entry:
887  %vector = bitcast <4 x i32> %a to <2 x double>
888  %d = extractelement <2 x double> %vector, i32 %c
889  ret double %d
890}
891
892define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) {
893; CHECK-LABEL: extract_v4i32_bitcast_const:
894; CHECK:       // %bb.0: // %entry
895; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
896; CHECK-NEXT:    ret
897entry:
898  %vector = bitcast <4 x i32> %a to <2 x double>
899  %d = extractelement <2 x double> %vector, i32 0
900  ret double %d
901}
902
903define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
904; CHECK-SD-LABEL: extract_v4i32_shuffle:
905; CHECK-SD:       // %bb.0: // %entry
906; CHECK-SD-NEXT:    sub sp, sp, #16
907; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
908; CHECK-SD-NEXT:    uzp1 v1.4s, v0.4s, v1.4s
909; CHECK-SD-NEXT:    mov x8, sp
910; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
911; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
912; CHECK-SD-NEXT:    mov v1.s[3], v0.s[3]
913; CHECK-SD-NEXT:    str q1, [sp]
914; CHECK-SD-NEXT:    ldr w0, [x8]
915; CHECK-SD-NEXT:    add sp, sp, #16
916; CHECK-SD-NEXT:    ret
917;
918; CHECK-GI-LABEL: extract_v4i32_shuffle:
919; CHECK-GI:       // %bb.0: // %entry
920; CHECK-GI-NEXT:    sub sp, sp, #16
921; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
922; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
923; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
924; CHECK-GI-NEXT:    mov x9, sp
925; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
926; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
927; CHECK-GI-NEXT:    mov w8, w0
928; CHECK-GI-NEXT:    and x8, x8, #0x3
929; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
930; CHECK-GI-NEXT:    str q0, [sp]
931; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
932; CHECK-GI-NEXT:    add sp, sp, #16
933; CHECK-GI-NEXT:    ret
934entry:
935  %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3>
936  %d = extractelement <4 x i32> %vector, i32 %c
937  ret i32 %d
938}
939
940define i32 @extract_v4i32_shuffle_const(<4 x i32> %a, <4 x i32> %b, i32 %c) {
941; CHECK-LABEL: extract_v4i32_shuffle_const:
942; CHECK:       // %bb.0: // %entry
943; CHECK-NEXT:    fmov w0, s1
944; CHECK-NEXT:    ret
945entry:
946  %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3>
947  %d = extractelement <4 x i32> %vector, i32 2
948  ret i32 %d
949}
950
951define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) {
952; CHECK-SD-LABEL: extract_v4i32_splat:
953; CHECK-SD:       // %bb.0: // %entry
954; CHECK-SD-NEXT:    sub sp, sp, #16
955; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
956; CHECK-SD-NEXT:    movi v0.4s, #11
957; CHECK-SD-NEXT:    mov x8, sp
958; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
959; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
960; CHECK-SD-NEXT:    str q0, [sp]
961; CHECK-SD-NEXT:    ldr w0, [x8]
962; CHECK-SD-NEXT:    add sp, sp, #16
963; CHECK-SD-NEXT:    ret
964;
965; CHECK-GI-LABEL: extract_v4i32_splat:
966; CHECK-GI:       // %bb.0: // %entry
967; CHECK-GI-NEXT:    sub sp, sp, #16
968; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
969; CHECK-GI-NEXT:    movi v0.4s, #11
970; CHECK-GI-NEXT:    mov w8, w0
971; CHECK-GI-NEXT:    mov x9, sp
972; CHECK-GI-NEXT:    and x8, x8, #0x3
973; CHECK-GI-NEXT:    str q0, [sp]
974; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
975; CHECK-GI-NEXT:    add sp, sp, #16
976; CHECK-GI-NEXT:    ret
977entry:
978  %d = extractelement <4 x i32> splat (i32 11), i32 %c
979  ret i32 %d
980}
981
982define i32 @extract_v4i32_splat_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
983; CHECK-LABEL: extract_v4i32_splat_const:
984; CHECK:       // %bb.0: // %entry
985; CHECK-NEXT:    mov w0, #11 // =0xb
986; CHECK-NEXT:    ret
987entry:
988  %d = extractelement <4 x i32> splat (i32 11), i32 0
989  ret i32 %d
990}
991
992define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) {
993; CHECK-SD-LABEL: extract_v4i32_vp_add:
994; CHECK-SD:       // %bb.0: // %entry
995; CHECK-SD-NEXT:    sub sp, sp, #16
996; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
997; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
998; CHECK-SD-NEXT:    mov x8, sp
999; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
1000; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
1001; CHECK-SD-NEXT:    str q0, [sp]
1002; CHECK-SD-NEXT:    ldr w0, [x8]
1003; CHECK-SD-NEXT:    add sp, sp, #16
1004; CHECK-SD-NEXT:    ret
1005;
1006; CHECK-GI-LABEL: extract_v4i32_vp_add:
1007; CHECK-GI:       // %bb.0: // %entry
1008; CHECK-GI-NEXT:    sub sp, sp, #16
1009; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
1010; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
1011; CHECK-GI-NEXT:    mov w8, w0
1012; CHECK-GI-NEXT:    mov x9, sp
1013; CHECK-GI-NEXT:    and x8, x8, #0x3
1014; CHECK-GI-NEXT:    str q0, [sp]
1015; CHECK-GI-NEXT:    ldr w0, [x9, x8, lsl #2]
1016; CHECK-GI-NEXT:    add sp, sp, #16
1017; CHECK-GI-NEXT:    ret
1018entry:
1019  %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
1020  %d = extractelement <4 x i32> %vector, i32 %c
1021  ret i32 %d
1022}
1023
1024define i32 @extract_v4i32_vp_add_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) {
1025; CHECK-SD-LABEL: extract_v4i32_vp_add_const:
1026; CHECK-SD:       // %bb.0: // %entry
1027; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
1028; CHECK-SD-NEXT:    mov w0, v0.s[3]
1029; CHECK-SD-NEXT:    ret
1030;
1031; CHECK-GI-LABEL: extract_v4i32_vp_add_const:
1032; CHECK-GI:       // %bb.0: // %entry
1033; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
1034; CHECK-GI-NEXT:    mov s0, v0.s[3]
1035; CHECK-GI-NEXT:    fmov w0, s0
1036; CHECK-GI-NEXT:    ret
1037entry:
1038  %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
1039  %d = extractelement <4 x i32> %vector, i32 3
1040  ret i32 %d
1041}
1042
1043define i32 @extract_v4i32_phi(i64 %val, i32  %limit, ptr %ptr) {
1044; CHECK-SD-LABEL: extract_v4i32_phi:
1045; CHECK-SD:       // %bb.0: // %entry
1046; CHECK-SD-NEXT:    dup v1.2s, w0
1047; CHECK-SD-NEXT:    adrp x8, .LCPI41_0
1048; CHECK-SD-NEXT:    movi v0.2s, #16
1049; CHECK-SD-NEXT:    ldr d2, [x8, :lo12:.LCPI41_0]
1050; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
1051; CHECK-SD-NEXT:  .LBB41_1: // %loop
1052; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
1053; CHECK-SD-NEXT:    fmov w8, s1
1054; CHECK-SD-NEXT:    add v1.2s, v1.2s, v0.2s
1055; CHECK-SD-NEXT:    cmp w8, w1
1056; CHECK-SD-NEXT:    add w0, w8, #10
1057; CHECK-SD-NEXT:    str w0, [x2, w8, sxtw #2]
1058; CHECK-SD-NEXT:    b.lo .LBB41_1
1059; CHECK-SD-NEXT:  // %bb.2: // %ret
1060; CHECK-SD-NEXT:    ret
1061;
1062; CHECK-GI-LABEL: extract_v4i32_phi:
1063; CHECK-GI:       // %bb.0: // %entry
1064; CHECK-GI-NEXT:    adrp x8, .LCPI41_0
1065; CHECK-GI-NEXT:    dup v0.2d, x0
1066; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI41_0]
1067; CHECK-GI-NEXT:    add v1.2d, v0.2d, v1.2d
1068; CHECK-GI-NEXT:    movi v0.2s, #16
1069; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
1070; CHECK-GI-NEXT:  .LBB41_1: // %loop
1071; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
1072; CHECK-GI-NEXT:    fmov w8, s1
1073; CHECK-GI-NEXT:    fmov w9, s1
1074; CHECK-GI-NEXT:    add v1.2s, v1.2s, v0.2s
1075; CHECK-GI-NEXT:    cmp w8, w1
1076; CHECK-GI-NEXT:    add w0, w9, #10
1077; CHECK-GI-NEXT:    str w0, [x2, w8, sxtw #2]
1078; CHECK-GI-NEXT:    b.lo .LBB41_1
1079; CHECK-GI-NEXT:  // %bb.2: // %ret
1080; CHECK-GI-NEXT:    ret
1081entry:
1082  %tempvector = insertelement <2 x i64> undef, i64 %val, i32 0
1083  %vector = shufflevector <2 x i64> %tempvector, <2 x i64> undef, <2 x i32> zeroinitializer
1084  %0 = add <2 x i64> %vector, <i64 1, i64 2>
1085  %1 = trunc <2 x i64> %0 to <2 x i32>
1086  br label %loop
1087
1088loop:
1089  %2 = phi <2 x i32> [ %1, %entry ], [ %inc, %loop ]
1090  %elt = extractelement <2 x i32> %2, i32 0
1091  %end = icmp ult i32 %elt, %limit
1092  %3 = add i32 10, %elt
1093  %4 = sext i32 %elt to i64
1094  %5 = getelementptr i32, ptr %ptr, i64 %4
1095  store i32 %3, ptr %5
1096  %inc = add <2 x i32> %2, <i32 16, i32 16>
1097  br i1 %end, label %loop, label %ret
1098
1099ret:
1100  ret i32 %3
1101}
1102