1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s 3 4; == 8 to 64-bit elements == 5 6define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @sel_x4_i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) nounwind { 7; CHECK-LABEL: sel_x4_i8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 10; CHECK-NEXT: addvl sp, sp, #-1 11; CHECK-NEXT: mov z26.d, z7.d 12; CHECK-NEXT: mov z31.d, z4.d 13; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 14; CHECK-NEXT: ptrue p1.b 15; CHECK-NEXT: mov z25.d, z6.d 16; CHECK-NEXT: mov z30.d, z3.d 17; CHECK-NEXT: mov z24.d, z5.d 18; CHECK-NEXT: mov z29.d, z2.d 19; CHECK-NEXT: mov p8.b, p0.b 20; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0] 21; CHECK-NEXT: mov z28.d, z1.d 22; CHECK-NEXT: sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b } 23; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 24; CHECK-NEXT: addvl sp, sp, #1 25; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 26; CHECK-NEXT: ret 27 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 28 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 29} 30 31define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @sel_x4_i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) nounwind { 32; CHECK-LABEL: sel_x4_i16: 33; CHECK: // %bb.0: 34; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 35; CHECK-NEXT: addvl sp, sp, #-1 36; CHECK-NEXT: mov z26.d, z7.d 37; CHECK-NEXT: mov z31.d, z4.d 38; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 39; CHECK-NEXT: ptrue p1.h 40; CHECK-NEXT: mov z25.d, z6.d 41; CHECK-NEXT: mov z30.d, z3.d 42; CHECK-NEXT: mov z24.d, z5.d 43; CHECK-NEXT: mov z29.d, z2.d 44; CHECK-NEXT: mov p8.b, p0.b 45; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] 46; CHECK-NEXT: mov z28.d, z1.d 47; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } 48; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 49; CHECK-NEXT: addvl sp, sp, #1 50; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 51; CHECK-NEXT: ret 52 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 53 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 54} 55 56define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @sel_x4_f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %unused, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) nounwind { 57; CHECK-LABEL: sel_x4_f16: 58; CHECK: // %bb.0: 59; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 60; CHECK-NEXT: addvl sp, sp, #-1 61; CHECK-NEXT: mov z26.d, z7.d 62; CHECK-NEXT: mov z31.d, z4.d 63; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 64; CHECK-NEXT: ptrue p1.h 65; CHECK-NEXT: mov z25.d, z6.d 66; CHECK-NEXT: mov z30.d, z3.d 67; CHECK-NEXT: mov z24.d, z5.d 68; CHECK-NEXT: mov z29.d, z2.d 69; CHECK-NEXT: mov p8.b, p0.b 70; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] 71; CHECK-NEXT: mov z28.d, z1.d 72; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } 73; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 74; CHECK-NEXT: addvl sp, sp, #1 75; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 76; CHECK-NEXT: ret 77 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) 78 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res 79} 80 81define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @sel_x4_bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) nounwind { 82; CHECK-LABEL: sel_x4_bf16: 83; CHECK: // %bb.0: 84; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 85; CHECK-NEXT: addvl sp, sp, #-1 86; CHECK-NEXT: mov z26.d, z7.d 87; CHECK-NEXT: mov z31.d, z4.d 88; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 89; CHECK-NEXT: ptrue p1.h 90; CHECK-NEXT: mov z25.d, z6.d 91; CHECK-NEXT: mov z30.d, z3.d 92; CHECK-NEXT: mov z24.d, z5.d 93; CHECK-NEXT: mov z29.d, z2.d 94; CHECK-NEXT: mov p8.b, p0.b 95; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] 96; CHECK-NEXT: mov z28.d, z1.d 97; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } 98; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 99; CHECK-NEXT: addvl sp, sp, #1 100; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 101; CHECK-NEXT: ret 102 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) 103 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res 104} 105 106define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @sel_x4_i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) nounwind { 107; CHECK-LABEL: sel_x4_i32: 108; CHECK: // %bb.0: 109; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 110; CHECK-NEXT: addvl sp, sp, #-1 111; CHECK-NEXT: mov z26.d, z7.d 112; CHECK-NEXT: mov z31.d, z4.d 113; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 114; CHECK-NEXT: ptrue p1.s 115; CHECK-NEXT: mov z25.d, z6.d 116; CHECK-NEXT: mov z30.d, z3.d 117; CHECK-NEXT: mov z24.d, z5.d 118; CHECK-NEXT: mov z29.d, z2.d 119; CHECK-NEXT: mov p8.b, p0.b 120; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] 121; CHECK-NEXT: mov z28.d, z1.d 122; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } 123; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 124; CHECK-NEXT: addvl sp, sp, #1 125; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 126; CHECK-NEXT: ret 127 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 128 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 129} 130 131define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @sel_x4_f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) nounwind { 132; CHECK-LABEL: sel_x4_f32: 133; CHECK: // %bb.0: 134; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 135; CHECK-NEXT: addvl sp, sp, #-1 136; CHECK-NEXT: mov z26.d, z7.d 137; CHECK-NEXT: mov z31.d, z4.d 138; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 139; CHECK-NEXT: ptrue p1.s 140; CHECK-NEXT: mov z25.d, z6.d 141; CHECK-NEXT: mov z30.d, z3.d 142; CHECK-NEXT: mov z24.d, z5.d 143; CHECK-NEXT: mov z29.d, z2.d 144; CHECK-NEXT: mov p8.b, p0.b 145; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] 146; CHECK-NEXT: mov z28.d, z1.d 147; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } 148; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 149; CHECK-NEXT: addvl sp, sp, #1 150; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 151; CHECK-NEXT: ret 152 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) 153 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res 154} 155 156define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @sel_x4_i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) nounwind { 157; CHECK-LABEL: sel_x4_i64: 158; CHECK: // %bb.0: 159; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 160; CHECK-NEXT: addvl sp, sp, #-1 161; CHECK-NEXT: mov z26.d, z7.d 162; CHECK-NEXT: mov z31.d, z4.d 163; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 164; CHECK-NEXT: ptrue p1.d 165; CHECK-NEXT: mov z25.d, z6.d 166; CHECK-NEXT: mov z30.d, z3.d 167; CHECK-NEXT: mov z24.d, z5.d 168; CHECK-NEXT: mov z29.d, z2.d 169; CHECK-NEXT: mov p8.b, p0.b 170; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] 171; CHECK-NEXT: mov z28.d, z1.d 172; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } 173; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 174; CHECK-NEXT: addvl sp, sp, #1 175; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 176; CHECK-NEXT: ret 177 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 178 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 179} 180 181define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @sel_x4_f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) nounwind { 182; CHECK-LABEL: sel_x4_f64: 183; CHECK: // %bb.0: 184; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 185; CHECK-NEXT: addvl sp, sp, #-1 186; CHECK-NEXT: mov z26.d, z7.d 187; CHECK-NEXT: mov z31.d, z4.d 188; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill 189; CHECK-NEXT: ptrue p1.d 190; CHECK-NEXT: mov z25.d, z6.d 191; CHECK-NEXT: mov z30.d, z3.d 192; CHECK-NEXT: mov z24.d, z5.d 193; CHECK-NEXT: mov z29.d, z2.d 194; CHECK-NEXT: mov p8.b, p0.b 195; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] 196; CHECK-NEXT: mov z28.d, z1.d 197; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } 198; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload 199; CHECK-NEXT: addvl sp, sp, #1 200; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 201; CHECK-NEXT: ret 202 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) 203 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res 204} 205 206 207; == 8 to 64-bit elements == 208declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 209declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 210declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 211declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 212declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) 213declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) 214declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) 215declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) 216