xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; extractelement
10;
11
12; Don't use SVE for 64-bit vectors.
13define half @extractelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
14; CHECK-LABEL: extractelement_v4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
17; CHECK-NEXT:    mov h0, v0.h[3]
18; CHECK-NEXT:    ret
19    %r = extractelement <4 x half> %op1, i64 3
20    ret half %r
21}
22
23; Don't use SVE for 128-bit vectors.
24define half @extractelement_v8f16(<8 x half> %op1) vscale_range(2,0) #0 {
25; CHECK-LABEL: extractelement_v8f16:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    mov h0, v0.h[7]
28; CHECK-NEXT:    ret
29    %r = extractelement <8 x half> %op1, i64 7
30    ret half %r
31}
32
33define half @extractelement_v16f16(ptr %a) vscale_range(2,0) #0 {
34; CHECK-LABEL: extractelement_v16f16:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ptrue p0.h, vl16
37; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
38; CHECK-NEXT:    mov z0.h, z0.h[15]
39; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
40; CHECK-NEXT:    ret
41    %op1 = load <16 x half>, ptr %a
42    %r = extractelement <16 x half> %op1, i64 15
43    ret half %r
44}
45
46define half @extractelement_v32f16(ptr %a) #0 {
47; VBITS_GE_256-LABEL: extractelement_v32f16:
48; VBITS_GE_256:       // %bb.0:
49; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
50; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
51; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
52; VBITS_GE_256-NEXT:    mov z0.h, z0.h[15]
53; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
54; VBITS_GE_256-NEXT:    ret
55;
56; VBITS_GE_512-LABEL: extractelement_v32f16:
57; VBITS_GE_512:       // %bb.0:
58; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
59; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
60; VBITS_GE_512-NEXT:    mov z0.h, z0.h[31]
61; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
62; VBITS_GE_512-NEXT:    ret
63    %op1 = load <32 x half>, ptr %a
64    %r = extractelement <32 x half> %op1, i64 31
65    ret half %r
66}
67
68define half @extractelement_v64f16(ptr %a) vscale_range(8,0) #0 {
69; CHECK-LABEL: extractelement_v64f16:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    ptrue p0.h, vl64
72; CHECK-NEXT:    mov w8, #63 // =0x3f
73; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
74; CHECK-NEXT:    whilels p0.h, xzr, x8
75; CHECK-NEXT:    lastb h0, p0, z0.h
76; CHECK-NEXT:    ret
77    %op1 = load <64 x half>, ptr %a
78    %r = extractelement <64 x half> %op1, i64 63
79    ret half %r
80}
81
82define half @extractelement_v128f16(ptr %a) vscale_range(16,0) #0 {
83; CHECK-LABEL: extractelement_v128f16:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    ptrue p0.h, vl128
86; CHECK-NEXT:    mov w8, #127 // =0x7f
87; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
88; CHECK-NEXT:    whilels p0.h, xzr, x8
89; CHECK-NEXT:    lastb h0, p0, z0.h
90; CHECK-NEXT:    ret
91    %op1 = load <128 x half>, ptr %a
92    %r = extractelement <128 x half> %op1, i64 127
93    ret half %r
94}
95
96; Don't use SVE for 64-bit vectors.
97define float @extractelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
98; CHECK-LABEL: extractelement_v2f32:
99; CHECK:       // %bb.0:
100; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
101; CHECK-NEXT:    mov s0, v0.s[1]
102; CHECK-NEXT:    ret
103    %r = extractelement <2 x float> %op1, i64 1
104    ret float %r
105}
106
107; Don't use SVE for 128-bit vectors.
108define float @extractelement_v4f32(<4 x float> %op1) vscale_range(2,0) #0 {
109; CHECK-LABEL: extractelement_v4f32:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    mov s0, v0.s[3]
112; CHECK-NEXT:    ret
113    %r = extractelement <4 x float> %op1, i64 3
114    ret float %r
115}
116
117define float @extractelement_v8f32(ptr %a) vscale_range(2,0) #0 {
118; CHECK-LABEL: extractelement_v8f32:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    ptrue p0.s, vl8
121; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
122; CHECK-NEXT:    mov z0.s, z0.s[7]
123; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
124; CHECK-NEXT:    ret
125    %op1 = load <8 x float>, ptr %a
126    %r = extractelement <8 x float> %op1, i64 7
127    ret float %r
128}
129
130define float @extractelement_v16f32(ptr %a) #0 {
131; VBITS_GE_256-LABEL: extractelement_v16f32:
132; VBITS_GE_256:       // %bb.0:
133; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
134; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
135; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
136; VBITS_GE_256-NEXT:    mov z0.s, z0.s[7]
137; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
138; VBITS_GE_256-NEXT:    ret
139;
140; VBITS_GE_512-LABEL: extractelement_v16f32:
141; VBITS_GE_512:       // %bb.0:
142; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
143; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
144; VBITS_GE_512-NEXT:    mov z0.s, z0.s[15]
145; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
146; VBITS_GE_512-NEXT:    ret
147    %op1 = load <16 x float>, ptr %a
148    %r = extractelement <16 x float> %op1, i64 15
149    ret float %r
150}
151
152define float @extractelement_v32f32(ptr %a) vscale_range(8,0) #0 {
153; CHECK-LABEL: extractelement_v32f32:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    ptrue p0.s, vl32
156; CHECK-NEXT:    mov w8, #31 // =0x1f
157; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
158; CHECK-NEXT:    whilels p0.s, xzr, x8
159; CHECK-NEXT:    lastb s0, p0, z0.s
160; CHECK-NEXT:    ret
161    %op1 = load <32 x float>, ptr %a
162    %r = extractelement <32 x float> %op1, i64 31
163    ret float %r
164}
165
166define float @extractelement_v64f32(ptr %a) vscale_range(16,0) #0 {
167; CHECK-LABEL: extractelement_v64f32:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    ptrue p0.s, vl64
170; CHECK-NEXT:    mov w8, #63 // =0x3f
171; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
172; CHECK-NEXT:    whilels p0.s, xzr, x8
173; CHECK-NEXT:    lastb s0, p0, z0.s
174; CHECK-NEXT:    ret
175    %op1 = load <64 x float>, ptr %a
176    %r = extractelement <64 x float> %op1, i64 63
177    ret float %r
178}
179
180; Don't use SVE for 64-bit vectors.
181define double @extractelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 {
182; CHECK-LABEL: extractelement_v1f64:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    ret
185    %r = extractelement <1 x double> %op1, i64 0
186    ret double %r
187}
188
189; Don't use SVE for 128-bit vectors.
190define double @extractelement_v2f64(<2 x double> %op1) vscale_range(2,0) #0 {
191; CHECK-LABEL: extractelement_v2f64:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    mov d0, v0.d[1]
194; CHECK-NEXT:    ret
195    %r = extractelement <2 x double> %op1, i64 1
196    ret double %r
197}
198
199define double @extractelement_v4f64(ptr %a) vscale_range(2,0) #0 {
200; CHECK-LABEL: extractelement_v4f64:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    ptrue p0.d, vl4
203; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
204; CHECK-NEXT:    mov z0.d, z0.d[3]
205; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
206; CHECK-NEXT:    ret
207    %op1 = load <4 x double>, ptr %a
208    %r = extractelement <4 x double> %op1, i64 3
209    ret double %r
210}
211
212define double @extractelement_v8f64(ptr %a) #0 {
213; VBITS_GE_256-LABEL: extractelement_v8f64:
214; VBITS_GE_256:       // %bb.0:
215; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
216; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
217; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
218; VBITS_GE_256-NEXT:    mov z0.d, z0.d[3]
219; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
220; VBITS_GE_256-NEXT:    ret
221;
222; VBITS_GE_512-LABEL: extractelement_v8f64:
223; VBITS_GE_512:       // %bb.0:
224; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
225; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
226; VBITS_GE_512-NEXT:    mov z0.d, z0.d[7]
227; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
228; VBITS_GE_512-NEXT:    ret
229    %op1 = load <8 x double>, ptr %a
230    %r = extractelement <8 x double> %op1, i64 7
231    ret double %r
232}
233
234define double @extractelement_v16f64(ptr %a) vscale_range(8,0) #0 {
235; CHECK-LABEL: extractelement_v16f64:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    ptrue p0.d, vl16
238; CHECK-NEXT:    mov w8, #15 // =0xf
239; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
240; CHECK-NEXT:    whilels p0.d, xzr, x8
241; CHECK-NEXT:    lastb d0, p0, z0.d
242; CHECK-NEXT:    ret
243    %op1 = load <16 x double>, ptr %a
244    %r = extractelement <16 x double> %op1, i64 15
245    ret double %r
246}
247
248define double @extractelement_v32f64(ptr %a) vscale_range(16,0) #0 {
249; CHECK-LABEL: extractelement_v32f64:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    ptrue p0.d, vl32
252; CHECK-NEXT:    mov w8, #31 // =0x1f
253; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
254; CHECK-NEXT:    whilels p0.d, xzr, x8
255; CHECK-NEXT:    lastb d0, p0, z0.d
256; CHECK-NEXT:    ret
257    %op1 = load <32 x double>, ptr %a
258    %r = extractelement <32 x double> %op1, i64 31
259    ret double %r
260}
261
262attributes #0 = { "target-features"="+sve" }
263