1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s 3 4target triple="aarch64-linux-gnu" 5 6 7; == Multi, multi (16-bit float) == 8 9define void @fdot_multi_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3) #0 { 10; CHECK-LABEL: fdot_multi_za32_f16_vg1x2: 11; CHECK: // %bb.0: 12; CHECK-NEXT: mov z5.d, z4.d 13; CHECK-NEXT: mov z7.d, z2.d 14; CHECK-NEXT: mov w8, w0 15; CHECK-NEXT: mov z4.d, z3.d 16; CHECK-NEXT: mov z6.d, z1.d 17; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } 18; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z6.h, z7.h }, { z4.h, z5.h } 19; CHECK-NEXT: ret 20 call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3) 21 %slice2 = add i32 %slice, 7 22 call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3) 23 ret void 24} 25 26define void @fdot_multi_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, 27; CHECK-LABEL: fdot_multi_za32_f16_vg1x4: 28; CHECK: // %bb.0: 29; CHECK-NEXT: mov z26.d, z7.d 30; CHECK-NEXT: mov z31.d, z4.d 31; CHECK-NEXT: mov w8, w0 32; CHECK-NEXT: ptrue p0.h 33; CHECK-NEXT: mov z25.d, z6.d 34; CHECK-NEXT: mov z30.d, z3.d 35; CHECK-NEXT: mov z24.d, z5.d 36; CHECK-NEXT: mov z29.d, z2.d 37; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] 38; CHECK-NEXT: mov z28.d, z1.d 39; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } 40; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } 41; CHECK-NEXT: ret 42 <vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) #0 { 43 call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, 44 <vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) 45 %slice2 = add i32 %slice, 7 46 call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, 47 <vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) 48 ret void 49} 50 51 52; == Multi, multi (16-bit bfloat) == 53 54define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3) #0 { 55; CHECK-LABEL: bfdot_multi_za32_bf16_vg1x2: 56; CHECK: // %bb.0: 57; CHECK-NEXT: mov z5.d, z4.d 58; CHECK-NEXT: mov z7.d, z2.d 59; CHECK-NEXT: mov w8, w0 60; CHECK-NEXT: mov z4.d, z3.d 61; CHECK-NEXT: mov z6.d, z1.d 62; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } 63; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z6.h, z7.h }, { z4.h, z5.h } 64; CHECK-NEXT: ret 65 call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3) 66 %slice2 = add i32 %slice, 7 67 call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3) 68 ret void 69} 70 71define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, 72; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4: 73; CHECK: // %bb.0: 74; CHECK-NEXT: mov z26.d, z7.d 75; CHECK-NEXT: mov z31.d, z4.d 76; CHECK-NEXT: mov w8, w0 77; CHECK-NEXT: ptrue p0.h 78; CHECK-NEXT: mov z25.d, z6.d 79; CHECK-NEXT: mov z30.d, z3.d 80; CHECK-NEXT: mov z24.d, z5.d 81; CHECK-NEXT: mov z29.d, z2.d 82; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] 83; CHECK-NEXT: mov z28.d, z1.d 84; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } 85; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } 86; CHECK-NEXT: ret 87 <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) #0 { 88 call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, 89 <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) 90 %slice2 = add i32 %slice, 7 91 call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, 92 <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) 93 ret void 94} 95 96 97; == Multi, single (16-bit float) == 98 99define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) #0 { 100; CHECK-LABEL: fdot_single_za32_f16_vg1x2: 101; CHECK: // %bb.0: 102; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 103; CHECK-NEXT: mov w8, w0 104; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 105; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h 106; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h 107; CHECK-NEXT: ret 108 call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) 109 %slice2 = add i32 %slice, 7 110 call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) 111 ret void 112} 113 114define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) #0 { 115; CHECK-LABEL: fdot_single_za32_f16_vg1x4: 116; CHECK: // %bb.0: 117; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 118; CHECK-NEXT: mov w8, w0 119; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 120; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 121; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 122; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h 123; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h 124; CHECK-NEXT: ret 125 call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) 126 %slice2 = add i32 %slice, 7 127 call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) 128 ret void 129} 130 131 132; == Multi, single (16-bit bfloat) == 133 134define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) #0 { 135; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2: 136; CHECK: // %bb.0: 137; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 138; CHECK-NEXT: mov w8, w0 139; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 140; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h 141; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h 142; CHECK-NEXT: ret 143 call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) 144 %slice2 = add i32 %slice, 7 145 call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) 146 ret void 147} 148 149define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) #0 { 150; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4: 151; CHECK: // %bb.0: 152; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 153; CHECK-NEXT: mov w8, w0 154; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 155; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 156; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 157; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h 158; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h 159; CHECK-NEXT: ret 160 call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) 161 %slice2 = add i32 %slice, 7 162 call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) 163 ret void 164} 165 166 167; == Multi, indexed (16-bit float) == 168 169define void @fdot_lane_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) #0 { 170; CHECK-LABEL: fdot_lane_za32_f16_vg1x2: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov z5.d, z2.d 173; CHECK-NEXT: mov w8, w0 174; CHECK-NEXT: mov z4.d, z1.d 175; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] 176; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] 177; CHECK-NEXT: ret 178 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, i32 3) 179 %slice2 = add i32 %slice, 7 180 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, i32 3) 181 ret void 182} 183 184define void @fdot_lane_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) #0 { 185; CHECK-LABEL: fdot_lane_za32_f16_vg1x4: 186; CHECK: // %bb.0: 187; CHECK-NEXT: mov z27.d, z4.d 188; CHECK-NEXT: mov w8, w0 189; CHECK-NEXT: mov z26.d, z3.d 190; CHECK-NEXT: mov z25.d, z2.d 191; CHECK-NEXT: mov z24.d, z1.d 192; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3] 193; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z24.h - z27.h }, z5.h[3] 194; CHECK-NEXT: ret 195 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, 196 <vscale x 8 x half> %zn4, i32 3) 197 %slice2 = add i32 %slice, 7 198 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 %slice2, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, 199 <vscale x 8 x half> %zn4, i32 3) 200 ret void 201} 202 203 204; == Multi, indexed (16-bit bfloat) == 205 206define void @bfdot_lane_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) #0 { 207; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2: 208; CHECK: // %bb.0: 209; CHECK-NEXT: mov z5.d, z2.d 210; CHECK-NEXT: mov w8, w0 211; CHECK-NEXT: mov z4.d, z1.d 212; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] 213; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] 214; CHECK-NEXT: ret 215 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, i32 3) 216 %slice2 = add i32 %slice, 7 217 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, i32 3) 218 ret void 219} 220 221define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) #0 { 222; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4: 223; CHECK: // %bb.0: 224; CHECK-NEXT: mov z27.d, z4.d 225; CHECK-NEXT: mov w8, w0 226; CHECK-NEXT: mov z26.d, z3.d 227; CHECK-NEXT: mov z25.d, z2.d 228; CHECK-NEXT: mov z24.d, z1.d 229; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3] 230; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z24.h - z27.h }, z5.h[3] 231; CHECK-NEXT: ret 232 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, 233 <vscale x 8 x bfloat> %zn4, i32 3) 234 %slice2 = add i32 %slice, 7 235 call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 %slice2, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, 236 <vscale x 8 x bfloat> %zn4, i32 3) 237 ret void 238} 239 240 241attributes #0 = { nounwind "target-features"="+sme2" } 242 243 244; == Multi, multi (16-bit float) 245 246declare void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 247declare void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, 248 <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 249 250; == Multi, multi (16-bit bfloat) 251 252declare void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 253declare void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 254 <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 255 256; == Multi, single (16-bit float) 257 258declare void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 259declare void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 260 261; == Multi, single (16-bit bfloat) 262 263declare void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 264declare void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 265 266; == Multi, indexed (16-bit float) 267 268declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32) 269declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32) 270 271; == Multi, indexed (16-bit bfloat) 272 273declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 274declare void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 275