1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s 3 4define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 5; CHECK-LABEL: test_16x4: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #0 8; CHECK-NEXT: ret 9entry: 10 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) 11 ret <4 x half> %res 12} 13 14define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 15; CHECK-LABEL: test_16x4_lane_1: 16; CHECK: // %bb.0: // %entry 17; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 18; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[1], #0 19; CHECK-NEXT: ret 20entry: 21 %c.cast = bitcast <4 x half> %c to <2 x i32> 22 %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 1, i32 1> 23 %c.res = bitcast <2 x i32> %c.dup to <4 x half> 24 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) 25 ret <4 x half> %res 26} 27 28define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 29; CHECK-LABEL: test_rot90_16x4: 30; CHECK: // %bb.0: // %entry 31; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #90 32; CHECK-NEXT: ret 33entry: 34 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) 35 ret <4 x half> %res 36} 37 38define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 39; CHECK-LABEL: test_rot90_16x4_lane_0: 40; CHECK: // %bb.0: // %entry 41; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 42; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[0], #90 43; CHECK-NEXT: ret 44entry: 45 %c.cast = bitcast <4 x half> %c to <2 x i32> 46 %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 0, i32 0> 47 %c.res = bitcast <2 x i32> %c.dup to <4 x half> 48 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) 49 ret <4 x half> %res 50} 51 52define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 53; CHECK-LABEL: test_rot180_16x4: 54; CHECK: // %bb.0: // %entry 55; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #180 56; CHECK-NEXT: ret 57entry: 58 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) 59 ret <4 x half> %res 60} 61 62define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) { 63; CHECK-LABEL: test_rot180_16x4_lane_0: 64; CHECK: // %bb.0: // %entry 65; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[0], #180 66; CHECK-NEXT: ret 67entry: 68 69 %c.cast = bitcast <8 x half> %c to <4 x i32> 70 %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> <i32 0, i32 0> 71 %c.res = bitcast <2 x i32> %c.dup to <4 x half> 72 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) 73 ret <4 x half> %res 74} 75 76define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 77; CHECK-LABEL: test_rot270_16x4: 78; CHECK: // %bb.0: // %entry 79; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #270 80; CHECK-NEXT: ret 81entry: 82 %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) 83 ret <4 x half> %res 84} 85 86define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 87; CHECK-LABEL: test_32x2: 88; CHECK: // %bb.0: // %entry 89; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #0 90; CHECK-NEXT: ret 91entry: 92 %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) 93 ret <2 x float> %res 94} 95 96define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 97; CHECK-LABEL: test_rot90_32x2: 98; CHECK: // %bb.0: // %entry 99; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #90 100; CHECK-NEXT: ret 101entry: 102 %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) 103 ret <2 x float> %res 104} 105 106define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 107; CHECK-LABEL: test_rot180_32x2: 108; CHECK: // %bb.0: // %entry 109; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #180 110; CHECK-NEXT: ret 111entry: 112 %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) 113 ret <2 x float> %res 114} 115 116define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { 117; CHECK-LABEL: test_rot270_32x2: 118; CHECK: // %bb.0: // %entry 119; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #270 120; CHECK-NEXT: ret 121entry: 122 %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) 123 ret <2 x float> %res 124} 125 126define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 127; CHECK-LABEL: test_16x8: 128; CHECK: // %bb.0: // %entry 129; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #0 130; CHECK-NEXT: ret 131entry: 132 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 133 ret <8 x half> %res 134} 135 136define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 137; CHECK-LABEL: test_16x8_lane_0: 138; CHECK: // %bb.0: // %entry 139; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[0], #0 140; CHECK-NEXT: ret 141entry: 142 %c.cast = bitcast <8 x half> %c to <4 x i32> 143 %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 144 %c.res = bitcast <4 x i32> %c.dup to <8 x half> 145 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res) 146 ret <8 x half> %res 147} 148 149define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 150; CHECK-LABEL: test_rot90_16x8: 151; CHECK: // %bb.0: // %entry 152; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #90 153; CHECK-NEXT: ret 154entry: 155 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 156 ret <8 x half> %res 157} 158 159define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 160; CHECK-LABEL: test_rot90_16x8_lane_1: 161; CHECK: // %bb.0: // %entry 162; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[1], #90 163; CHECK-NEXT: ret 164entry: 165 %c.cast = bitcast <8 x half> %c to <4 x i32> 166 %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 167 %c.res = bitcast <4 x i32> %c.dup to <8 x half> 168 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res) 169 ret <8 x half> %res 170} 171 172define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 173; CHECK-LABEL: test_rot180_16x8: 174; CHECK: // %bb.0: // %entry 175; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #180 176; CHECK-NEXT: ret 177entry: 178 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 179 ret <8 x half> %res 180} 181 182define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 183; CHECK-LABEL: test_rot180_16x8_lane_1: 184; CHECK: // %bb.0: // %entry 185; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[1], #180 186; CHECK-NEXT: ret 187entry: 188 %c.cast = bitcast <8 x half> %c to <4 x i32> 189 %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 190 %c.res = bitcast <4 x i32> %c.dup to <8 x half> 191 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res) 192 ret <8 x half> %res 193} 194 195define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 196; CHECK-LABEL: test_rot270_16x8: 197; CHECK: // %bb.0: // %entry 198; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #270 199; CHECK-NEXT: ret 200entry: 201 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 202 ret <8 x half> %res 203} 204 205define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 206; CHECK-LABEL: test_rot270_16x8_lane_0: 207; CHECK: // %bb.0: // %entry 208; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[0], #270 209; CHECK-NEXT: ret 210entry: 211 %c.cast = bitcast <8 x half> %c to <4 x i32> 212 %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 213 %c.res = bitcast <4 x i32> %c.dup to <8 x half> 214 %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res) 215 ret <8 x half> %res 216} 217 218define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 219; CHECK-LABEL: test_32x4: 220; CHECK: // %bb.0: // %entry 221; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #0 222; CHECK-NEXT: ret 223entry: 224 %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 225 ret <4 x float> %res 226} 227 228define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 229; CHECK-LABEL: test_32x4_lane_0: 230; CHECK: // %bb.0: // %entry 231; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.s[0], #0 232; CHECK-NEXT: ret 233entry: 234 %c.cast = bitcast <4 x float> %c to <2 x i64> 235 %c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> <i32 0, i32 0> 236 %c.res = bitcast <2 x i64> %c.dup to <4 x float> 237 %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c.res) 238 ret <4 x float> %res 239} 240 241define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 242; CHECK-LABEL: test_rot90_32x4: 243; CHECK: // %bb.0: // %entry 244; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #90 245; CHECK-NEXT: ret 246entry: 247 %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 248 ret <4 x float> %res 249} 250 251define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 252; CHECK-LABEL: test_rot180_32x4: 253; CHECK: // %bb.0: // %entry 254; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #180 255; CHECK-NEXT: ret 256entry: 257 %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 258 ret <4 x float> %res 259} 260 261define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 262; CHECK-LABEL: test_rot270_32x4: 263; CHECK: // %bb.0: // %entry 264; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #270 265; CHECK-NEXT: ret 266entry: 267 %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 268 ret <4 x float> %res 269} 270 271define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 272; CHECK-LABEL: test_64x2: 273; CHECK: // %bb.0: // %entry 274; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #0 275; CHECK-NEXT: ret 276entry: 277 %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 278 ret <2 x double> %res 279} 280 281define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 282; CHECK-LABEL: test_rot90_64x2: 283; CHECK: // %bb.0: // %entry 284; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #90 285; CHECK-NEXT: ret 286entry: 287 %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 288 ret <2 x double> %res 289} 290 291define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 292; CHECK-LABEL: test_rot180_64x2: 293; CHECK: // %bb.0: // %entry 294; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #180 295; CHECK-NEXT: ret 296entry: 297 %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 298 ret <2 x double> %res 299} 300 301define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 302; CHECK-LABEL: test_rot270_64x2: 303; CHECK: // %bb.0: // %entry 304; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #270 305; CHECK-NEXT: ret 306entry: 307 %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 308 ret <2 x double> %res 309} 310 311define <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 312; CHECK-LABEL: reassoc_f32x4: 313; CHECK: // %bb.0: // %entry 314; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #0 315; CHECK-NEXT: ret 316entry: 317 %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 318 %res = fadd fast <4 x float> %d, %a 319 ret <4 x float> %res 320} 321 322define <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 323; CHECK-LABEL: reassoc_c_f32x4: 324; CHECK: // %bb.0: // %entry 325; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #90 326; CHECK-NEXT: ret 327entry: 328 %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 329 %res = fadd fast <4 x float> %a, %d 330 ret <4 x float> %res 331} 332 333define <4 x half> @reassoc_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 334; CHECK-LABEL: reassoc_f16x4: 335; CHECK: // %bb.0: // %entry 336; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #180 337; CHECK-NEXT: ret 338entry: 339 %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c) 340 %res = fadd fast <4 x half> %d, %a 341 ret <4 x half> %res 342} 343 344define <4 x half> @reassoc_c_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 345; CHECK-LABEL: reassoc_c_f16x4: 346; CHECK: // %bb.0: // %entry 347; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #270 348; CHECK-NEXT: ret 349entry: 350 %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c) 351 %res = fadd fast <4 x half> %a, %d 352 ret <4 x half> %res 353} 354 355define <2 x double> @reassoc_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) { 356; CHECK-LABEL: reassoc_f64x2: 357; CHECK: // %bb.0: // %entry 358; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #270 359; CHECK-NEXT: fcmla v0.2d, v2.2d, v3.2d, #270 360; CHECK-NEXT: ret 361entry: 362 %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 363 %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> zeroinitializer, <2 x double> %c, <2 x double> %g) 364 %res = fadd fast <2 x double> %e, %d 365 ret <2 x double> %res 366} 367 368define <2 x double> @reassoc_c_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) { 369; CHECK-LABEL: reassoc_c_f64x2: 370; CHECK: // %bb.0: // %entry 371; CHECK-NEXT: fadd v0.2d, v0.2d, v0.2d 372; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #270 373; CHECK-NEXT: fcmla v0.2d, v2.2d, v3.2d, #270 374; CHECK-NEXT: ret 375entry: 376 %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 377 %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %c, <2 x double> %g) 378 %res = fadd fast <2 x double> %e, %d 379 ret <2 x double> %res 380} 381 382define <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 383; CHECK-LABEL: reassoc_nonfast_f32x4: 384; CHECK: // %bb.0: // %entry 385; CHECK-NEXT: movi v3.2d, #0000000000000000 386; CHECK-NEXT: fcmla v3.4s, v1.4s, v2.4s, #0 387; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s 388; CHECK-NEXT: ret 389entry: 390 %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 391 %res = fadd <4 x float> %d, %a 392 ret <4 x float> %res 393} 394 395declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>) 396declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>) 397declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>) 398declare <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half>, <4 x half>, <4 x half>) 399declare <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half>, <8 x half>, <8 x half>) 400declare <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half>, <8 x half>, <8 x half>) 401declare <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half>, <8 x half>, <8 x half>) 402declare <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half>, <8 x half>, <8 x half>) 403declare <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float>, <2 x float>, <2 x float>) 404declare <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float>, <2 x float>, <2 x float>) 405declare <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float>, <2 x float>, <2 x float>) 406declare <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float>, <2 x float>, <2 x float>) 407declare <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float>, <4 x float>, <4 x float>) 408declare <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float>, <4 x float>, <4 x float>) 409declare <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float>, <4 x float>, <4 x float>) 410declare <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float>, <4 x float>, <4 x float>) 411declare <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double>, <2 x double>, <2 x double>) 412declare <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double>, <2 x double>, <2 x double>) 413declare <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double>, <2 x double>, <2 x double>) 414declare <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double>, <2 x double>, <2 x double>) 415