xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s
3
4; == 8 to 64-bit elements ==
5
6define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @sel_x4_i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) nounwind {
7; CHECK-LABEL: sel_x4_i8:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
10; CHECK-NEXT:    addvl sp, sp, #-1
11; CHECK-NEXT:    mov z26.d, z7.d
12; CHECK-NEXT:    mov z31.d, z4.d
13; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
14; CHECK-NEXT:    ptrue p1.b
15; CHECK-NEXT:    mov z25.d, z6.d
16; CHECK-NEXT:    mov z30.d, z3.d
17; CHECK-NEXT:    mov z24.d, z5.d
18; CHECK-NEXT:    mov z29.d, z2.d
19; CHECK-NEXT:    mov p8.b, p0.b
20; CHECK-NEXT:    ld1b { z27.b }, p1/z, [x0]
21; CHECK-NEXT:    mov z28.d, z1.d
22; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b }
23; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
24; CHECK-NEXT:    addvl sp, sp, #1
25; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
26; CHECK-NEXT:    ret
27  %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
28  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
29}
30
31define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @sel_x4_i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) nounwind {
32; CHECK-LABEL: sel_x4_i16:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
35; CHECK-NEXT:    addvl sp, sp, #-1
36; CHECK-NEXT:    mov z26.d, z7.d
37; CHECK-NEXT:    mov z31.d, z4.d
38; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
39; CHECK-NEXT:    ptrue p1.h
40; CHECK-NEXT:    mov z25.d, z6.d
41; CHECK-NEXT:    mov z30.d, z3.d
42; CHECK-NEXT:    mov z24.d, z5.d
43; CHECK-NEXT:    mov z29.d, z2.d
44; CHECK-NEXT:    mov p8.b, p0.b
45; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
46; CHECK-NEXT:    mov z28.d, z1.d
47; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
48; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
49; CHECK-NEXT:    addvl sp, sp, #1
50; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
51; CHECK-NEXT:    ret
52  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
53  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
54}
55
56define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @sel_x4_f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %unused, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) nounwind {
57; CHECK-LABEL: sel_x4_f16:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
60; CHECK-NEXT:    addvl sp, sp, #-1
61; CHECK-NEXT:    mov z26.d, z7.d
62; CHECK-NEXT:    mov z31.d, z4.d
63; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
64; CHECK-NEXT:    ptrue p1.h
65; CHECK-NEXT:    mov z25.d, z6.d
66; CHECK-NEXT:    mov z30.d, z3.d
67; CHECK-NEXT:    mov z24.d, z5.d
68; CHECK-NEXT:    mov z29.d, z2.d
69; CHECK-NEXT:    mov p8.b, p0.b
70; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
71; CHECK-NEXT:    mov z28.d, z1.d
72; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
73; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
74; CHECK-NEXT:    addvl sp, sp, #1
75; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
76; CHECK-NEXT:    ret
77  %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
78  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
79}
80
81define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @sel_x4_bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) nounwind {
82; CHECK-LABEL: sel_x4_bf16:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
85; CHECK-NEXT:    addvl sp, sp, #-1
86; CHECK-NEXT:    mov z26.d, z7.d
87; CHECK-NEXT:    mov z31.d, z4.d
88; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
89; CHECK-NEXT:    ptrue p1.h
90; CHECK-NEXT:    mov z25.d, z6.d
91; CHECK-NEXT:    mov z30.d, z3.d
92; CHECK-NEXT:    mov z24.d, z5.d
93; CHECK-NEXT:    mov z29.d, z2.d
94; CHECK-NEXT:    mov p8.b, p0.b
95; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
96; CHECK-NEXT:    mov z28.d, z1.d
97; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
98; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
99; CHECK-NEXT:    addvl sp, sp, #1
100; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
101; CHECK-NEXT:    ret
102  %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
103  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
104}
105
106define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @sel_x4_i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) nounwind {
107; CHECK-LABEL: sel_x4_i32:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
110; CHECK-NEXT:    addvl sp, sp, #-1
111; CHECK-NEXT:    mov z26.d, z7.d
112; CHECK-NEXT:    mov z31.d, z4.d
113; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
114; CHECK-NEXT:    ptrue p1.s
115; CHECK-NEXT:    mov z25.d, z6.d
116; CHECK-NEXT:    mov z30.d, z3.d
117; CHECK-NEXT:    mov z24.d, z5.d
118; CHECK-NEXT:    mov z29.d, z2.d
119; CHECK-NEXT:    mov p8.b, p0.b
120; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
121; CHECK-NEXT:    mov z28.d, z1.d
122; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
123; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
124; CHECK-NEXT:    addvl sp, sp, #1
125; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
126; CHECK-NEXT:    ret
127  %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
128  ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
129}
130
131define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @sel_x4_f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) nounwind {
132; CHECK-LABEL: sel_x4_f32:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
135; CHECK-NEXT:    addvl sp, sp, #-1
136; CHECK-NEXT:    mov z26.d, z7.d
137; CHECK-NEXT:    mov z31.d, z4.d
138; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
139; CHECK-NEXT:    ptrue p1.s
140; CHECK-NEXT:    mov z25.d, z6.d
141; CHECK-NEXT:    mov z30.d, z3.d
142; CHECK-NEXT:    mov z24.d, z5.d
143; CHECK-NEXT:    mov z29.d, z2.d
144; CHECK-NEXT:    mov p8.b, p0.b
145; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
146; CHECK-NEXT:    mov z28.d, z1.d
147; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
148; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
149; CHECK-NEXT:    addvl sp, sp, #1
150; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
151; CHECK-NEXT:    ret
152  %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
153  ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
154}
155
156define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @sel_x4_i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) nounwind {
157; CHECK-LABEL: sel_x4_i64:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
160; CHECK-NEXT:    addvl sp, sp, #-1
161; CHECK-NEXT:    mov z26.d, z7.d
162; CHECK-NEXT:    mov z31.d, z4.d
163; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
164; CHECK-NEXT:    ptrue p1.d
165; CHECK-NEXT:    mov z25.d, z6.d
166; CHECK-NEXT:    mov z30.d, z3.d
167; CHECK-NEXT:    mov z24.d, z5.d
168; CHECK-NEXT:    mov z29.d, z2.d
169; CHECK-NEXT:    mov p8.b, p0.b
170; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
171; CHECK-NEXT:    mov z28.d, z1.d
172; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
173; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
174; CHECK-NEXT:    addvl sp, sp, #1
175; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
176; CHECK-NEXT:    ret
177  %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
178  ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
179}
180
181define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @sel_x4_f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) nounwind {
182; CHECK-LABEL: sel_x4_f64:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
185; CHECK-NEXT:    addvl sp, sp, #-1
186; CHECK-NEXT:    mov z26.d, z7.d
187; CHECK-NEXT:    mov z31.d, z4.d
188; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
189; CHECK-NEXT:    ptrue p1.d
190; CHECK-NEXT:    mov z25.d, z6.d
191; CHECK-NEXT:    mov z30.d, z3.d
192; CHECK-NEXT:    mov z24.d, z5.d
193; CHECK-NEXT:    mov z29.d, z2.d
194; CHECK-NEXT:    mov p8.b, p0.b
195; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
196; CHECK-NEXT:    mov z28.d, z1.d
197; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
198; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
199; CHECK-NEXT:    addvl sp, sp, #1
200; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
201; CHECK-NEXT:    ret
202  %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
203  ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
204}
205
206
207; == 8 to 64-bit elements ==
208declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4)
209declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
210declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
211declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
212declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
213declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
214declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
215declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
216