1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; CHECK_GI: warning: Instruction selection used fallback path for mulv_v3i64 6 7declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) 8declare i8 @llvm.vector.reduce.mul.v3i8(<3 x i8>) 9declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) 10declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) 11declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) 12declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) 13declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) 14declare i16 @llvm.vector.reduce.mul.v3i16(<3 x i16>) 15declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) 16declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 17declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) 18declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) 19declare i32 @llvm.vector.reduce.mul.v3i32(<3 x i32>) 20declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 21declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) 22declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) 23declare i64 @llvm.vector.reduce.mul.v3i64(<3 x i64>) 24declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) 25declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>) 26 27define i8 @mulv_v2i8(<2 x i8> %a) { 28; CHECK-LABEL: mulv_v2i8: 29; CHECK: // %bb.0: // %entry 30; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 31; CHECK-NEXT: mov w8, v0.s[1] 32; CHECK-NEXT: fmov w9, s0 33; CHECK-NEXT: mul w0, w9, w8 34; CHECK-NEXT: ret 35entry: 36 %arg1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a) 37 ret i8 %arg1 38} 39 40define i8 @mulv_v3i8(<3 x i8> %a) { 41; CHECK-LABEL: mulv_v3i8: 42; CHECK: // %bb.0: // %entry 43; CHECK-NEXT: mul w8, w0, w1 44; CHECK-NEXT: mul w0, w8, w2 45; CHECK-NEXT: ret 46entry: 47 %arg1 = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %a) 48 ret i8 %arg1 49} 50 51define i8 @mulv_v4i8(<4 x i8> %a) { 52; CHECK-SD-LABEL: mulv_v4i8: 53; CHECK-SD: // %bb.0: // %entry 54; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 55; CHECK-SD-NEXT: umov w8, v0.h[1] 56; CHECK-SD-NEXT: umov w9, v0.h[0] 57; CHECK-SD-NEXT: umov w10, v0.h[2] 58; CHECK-SD-NEXT: mul w8, w9, w8 59; CHECK-SD-NEXT: umov w9, v0.h[3] 60; CHECK-SD-NEXT: mul w8, w8, w10 61; CHECK-SD-NEXT: mul w0, w8, w9 62; CHECK-SD-NEXT: ret 63; 64; CHECK-GI-LABEL: mulv_v4i8: 65; CHECK-GI: // %bb.0: // %entry 66; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 67; CHECK-GI-NEXT: umov w8, v0.h[0] 68; CHECK-GI-NEXT: umov w9, v0.h[1] 69; CHECK-GI-NEXT: umov w10, v0.h[2] 70; CHECK-GI-NEXT: umov w11, v0.h[3] 71; CHECK-GI-NEXT: mul w8, w8, w9 72; CHECK-GI-NEXT: mul w9, w10, w11 73; CHECK-GI-NEXT: mul w0, w8, w9 74; CHECK-GI-NEXT: ret 75entry: 76 %arg1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a) 77 ret i8 %arg1 78} 79 80define i8 @mulv_v8i8(<8 x i8> %a) { 81; CHECK-SD-LABEL: mulv_v8i8: 82; CHECK-SD: // %bb.0: // %entry 83; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 84; CHECK-SD-NEXT: umov w8, v0.b[1] 85; CHECK-SD-NEXT: umov w9, v0.b[0] 86; CHECK-SD-NEXT: umov w10, v0.b[2] 87; CHECK-SD-NEXT: mul w8, w9, w8 88; CHECK-SD-NEXT: umov w9, v0.b[3] 89; CHECK-SD-NEXT: mul w8, w8, w10 90; CHECK-SD-NEXT: umov w10, v0.b[4] 91; CHECK-SD-NEXT: mul w8, w8, w9 92; CHECK-SD-NEXT: umov w9, v0.b[5] 93; CHECK-SD-NEXT: mul w8, w8, w10 94; CHECK-SD-NEXT: umov w10, v0.b[6] 95; CHECK-SD-NEXT: mul w8, w8, w9 96; CHECK-SD-NEXT: umov w9, v0.b[7] 97; CHECK-SD-NEXT: mul w8, w8, w10 98; CHECK-SD-NEXT: mul w0, w8, w9 99; CHECK-SD-NEXT: ret 100; 101; CHECK-GI-LABEL: mulv_v8i8: 102; CHECK-GI: // %bb.0: // %entry 103; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 104; CHECK-GI-NEXT: umov w8, v0.b[0] 105; CHECK-GI-NEXT: umov w9, v0.b[1] 106; CHECK-GI-NEXT: umov w10, v0.b[2] 107; CHECK-GI-NEXT: umov w11, v0.b[3] 108; CHECK-GI-NEXT: umov w12, v0.b[4] 109; CHECK-GI-NEXT: umov w13, v0.b[5] 110; CHECK-GI-NEXT: umov w14, v0.b[6] 111; CHECK-GI-NEXT: umov w15, v0.b[7] 112; CHECK-GI-NEXT: mul w8, w8, w9 113; CHECK-GI-NEXT: mul w9, w10, w11 114; CHECK-GI-NEXT: mul w10, w12, w13 115; CHECK-GI-NEXT: mul w11, w14, w15 116; CHECK-GI-NEXT: mul w8, w8, w9 117; CHECK-GI-NEXT: mul w9, w10, w11 118; CHECK-GI-NEXT: mul w0, w8, w9 119; CHECK-GI-NEXT: ret 120entry: 121 %arg1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a) 122 ret i8 %arg1 123} 124 125define i8 @mulv_v16i8(<16 x i8> %a) { 126; CHECK-SD-LABEL: mulv_v16i8: 127; CHECK-SD: // %bb.0: // %entry 128; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 129; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b 130; CHECK-SD-NEXT: umov w8, v0.b[1] 131; CHECK-SD-NEXT: umov w9, v0.b[0] 132; CHECK-SD-NEXT: umov w10, v0.b[2] 133; CHECK-SD-NEXT: mul w8, w9, w8 134; CHECK-SD-NEXT: umov w9, v0.b[3] 135; CHECK-SD-NEXT: mul w8, w8, w10 136; CHECK-SD-NEXT: umov w10, v0.b[4] 137; CHECK-SD-NEXT: mul w8, w8, w9 138; CHECK-SD-NEXT: umov w9, v0.b[5] 139; CHECK-SD-NEXT: mul w8, w8, w10 140; CHECK-SD-NEXT: umov w10, v0.b[6] 141; CHECK-SD-NEXT: mul w8, w8, w9 142; CHECK-SD-NEXT: umov w9, v0.b[7] 143; CHECK-SD-NEXT: mul w8, w8, w10 144; CHECK-SD-NEXT: mul w0, w8, w9 145; CHECK-SD-NEXT: ret 146; 147; CHECK-GI-LABEL: mulv_v16i8: 148; CHECK-GI: // %bb.0: // %entry 149; CHECK-GI-NEXT: mov d1, v0.d[1] 150; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b 151; CHECK-GI-NEXT: umov w8, v0.b[0] 152; CHECK-GI-NEXT: umov w9, v0.b[1] 153; CHECK-GI-NEXT: umov w10, v0.b[2] 154; CHECK-GI-NEXT: umov w11, v0.b[3] 155; CHECK-GI-NEXT: umov w12, v0.b[4] 156; CHECK-GI-NEXT: umov w13, v0.b[5] 157; CHECK-GI-NEXT: umov w14, v0.b[6] 158; CHECK-GI-NEXT: umov w15, v0.b[7] 159; CHECK-GI-NEXT: mul w8, w8, w9 160; CHECK-GI-NEXT: mul w9, w10, w11 161; CHECK-GI-NEXT: mul w10, w12, w13 162; CHECK-GI-NEXT: mul w11, w14, w15 163; CHECK-GI-NEXT: mul w8, w8, w9 164; CHECK-GI-NEXT: mul w9, w10, w11 165; CHECK-GI-NEXT: mul w0, w8, w9 166; CHECK-GI-NEXT: ret 167entry: 168 %arg1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a) 169 ret i8 %arg1 170} 171 172define i8 @mulv_v32i8(<32 x i8> %a) { 173; CHECK-SD-LABEL: mulv_v32i8: 174; CHECK-SD: // %bb.0: // %entry 175; CHECK-SD-NEXT: mul v0.16b, v0.16b, v1.16b 176; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 177; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b 178; CHECK-SD-NEXT: umov w8, v0.b[1] 179; CHECK-SD-NEXT: umov w9, v0.b[0] 180; CHECK-SD-NEXT: umov w10, v0.b[2] 181; CHECK-SD-NEXT: mul w8, w9, w8 182; CHECK-SD-NEXT: umov w9, v0.b[3] 183; CHECK-SD-NEXT: mul w8, w8, w10 184; CHECK-SD-NEXT: umov w10, v0.b[4] 185; CHECK-SD-NEXT: mul w8, w8, w9 186; CHECK-SD-NEXT: umov w9, v0.b[5] 187; CHECK-SD-NEXT: mul w8, w8, w10 188; CHECK-SD-NEXT: umov w10, v0.b[6] 189; CHECK-SD-NEXT: mul w8, w8, w9 190; CHECK-SD-NEXT: umov w9, v0.b[7] 191; CHECK-SD-NEXT: mul w8, w8, w10 192; CHECK-SD-NEXT: mul w0, w8, w9 193; CHECK-SD-NEXT: ret 194; 195; CHECK-GI-LABEL: mulv_v32i8: 196; CHECK-GI: // %bb.0: // %entry 197; CHECK-GI-NEXT: mov d2, v0.d[1] 198; CHECK-GI-NEXT: mov d3, v1.d[1] 199; CHECK-GI-NEXT: mul v0.8b, v0.8b, v2.8b 200; CHECK-GI-NEXT: mul v1.8b, v1.8b, v3.8b 201; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b 202; CHECK-GI-NEXT: umov w8, v0.b[0] 203; CHECK-GI-NEXT: umov w9, v0.b[1] 204; CHECK-GI-NEXT: umov w10, v0.b[2] 205; CHECK-GI-NEXT: umov w11, v0.b[3] 206; CHECK-GI-NEXT: umov w12, v0.b[4] 207; CHECK-GI-NEXT: umov w13, v0.b[5] 208; CHECK-GI-NEXT: umov w14, v0.b[6] 209; CHECK-GI-NEXT: umov w15, v0.b[7] 210; CHECK-GI-NEXT: mul w8, w8, w9 211; CHECK-GI-NEXT: mul w9, w10, w11 212; CHECK-GI-NEXT: mul w10, w12, w13 213; CHECK-GI-NEXT: mul w11, w14, w15 214; CHECK-GI-NEXT: mul w8, w8, w9 215; CHECK-GI-NEXT: mul w9, w10, w11 216; CHECK-GI-NEXT: mul w0, w8, w9 217; CHECK-GI-NEXT: ret 218entry: 219 %arg1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a) 220 ret i8 %arg1 221} 222 223define i16 @mulv_v2i16(<2 x i16> %a) { 224; CHECK-LABEL: mulv_v2i16: 225; CHECK: // %bb.0: // %entry 226; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 227; CHECK-NEXT: mov w8, v0.s[1] 228; CHECK-NEXT: fmov w9, s0 229; CHECK-NEXT: mul w0, w9, w8 230; CHECK-NEXT: ret 231entry: 232 %arg1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a) 233 ret i16 %arg1 234} 235 236define i16 @mulv_v3i16(<3 x i16> %a) { 237; CHECK-SD-LABEL: mulv_v3i16: 238; CHECK-SD: // %bb.0: // %entry 239; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 240; CHECK-SD-NEXT: umov w8, v0.h[1] 241; CHECK-SD-NEXT: umov w9, v0.h[0] 242; CHECK-SD-NEXT: umov w10, v0.h[2] 243; CHECK-SD-NEXT: mul w8, w9, w8 244; CHECK-SD-NEXT: mul w0, w8, w10 245; CHECK-SD-NEXT: ret 246; 247; CHECK-GI-LABEL: mulv_v3i16: 248; CHECK-GI: // %bb.0: // %entry 249; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 250; CHECK-GI-NEXT: umov w8, v0.h[0] 251; CHECK-GI-NEXT: umov w9, v0.h[1] 252; CHECK-GI-NEXT: umov w10, v0.h[2] 253; CHECK-GI-NEXT: mul w8, w8, w9 254; CHECK-GI-NEXT: mul w0, w8, w10 255; CHECK-GI-NEXT: ret 256entry: 257 %arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a) 258 ret i16 %arg1 259} 260 261define i16 @mulv_v4i16(<4 x i16> %a) { 262; CHECK-SD-LABEL: mulv_v4i16: 263; CHECK-SD: // %bb.0: // %entry 264; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 265; CHECK-SD-NEXT: umov w8, v0.h[1] 266; CHECK-SD-NEXT: umov w9, v0.h[0] 267; CHECK-SD-NEXT: umov w10, v0.h[2] 268; CHECK-SD-NEXT: mul w8, w9, w8 269; CHECK-SD-NEXT: umov w9, v0.h[3] 270; CHECK-SD-NEXT: mul w8, w8, w10 271; CHECK-SD-NEXT: mul w0, w8, w9 272; CHECK-SD-NEXT: ret 273; 274; CHECK-GI-LABEL: mulv_v4i16: 275; CHECK-GI: // %bb.0: // %entry 276; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 277; CHECK-GI-NEXT: umov w8, v0.h[0] 278; CHECK-GI-NEXT: umov w9, v0.h[1] 279; CHECK-GI-NEXT: umov w10, v0.h[2] 280; CHECK-GI-NEXT: umov w11, v0.h[3] 281; CHECK-GI-NEXT: mul w8, w8, w9 282; CHECK-GI-NEXT: mul w9, w10, w11 283; CHECK-GI-NEXT: mul w0, w8, w9 284; CHECK-GI-NEXT: ret 285entry: 286 %arg1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a) 287 ret i16 %arg1 288} 289 290define i16 @mulv_v8i16(<8 x i16> %a) { 291; CHECK-SD-LABEL: mulv_v8i16: 292; CHECK-SD: // %bb.0: // %entry 293; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 294; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h 295; CHECK-SD-NEXT: umov w8, v0.h[1] 296; CHECK-SD-NEXT: umov w9, v0.h[0] 297; CHECK-SD-NEXT: umov w10, v0.h[2] 298; CHECK-SD-NEXT: mul w8, w9, w8 299; CHECK-SD-NEXT: umov w9, v0.h[3] 300; CHECK-SD-NEXT: mul w8, w8, w10 301; CHECK-SD-NEXT: mul w0, w8, w9 302; CHECK-SD-NEXT: ret 303; 304; CHECK-GI-LABEL: mulv_v8i16: 305; CHECK-GI: // %bb.0: // %entry 306; CHECK-GI-NEXT: mov d1, v0.d[1] 307; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h 308; CHECK-GI-NEXT: umov w8, v0.h[0] 309; CHECK-GI-NEXT: umov w9, v0.h[1] 310; CHECK-GI-NEXT: umov w10, v0.h[2] 311; CHECK-GI-NEXT: umov w11, v0.h[3] 312; CHECK-GI-NEXT: mul w8, w8, w9 313; CHECK-GI-NEXT: mul w9, w10, w11 314; CHECK-GI-NEXT: mul w0, w8, w9 315; CHECK-GI-NEXT: ret 316entry: 317 %arg1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a) 318 ret i16 %arg1 319} 320 321define i16 @mulv_v16i16(<16 x i16> %a) { 322; CHECK-SD-LABEL: mulv_v16i16: 323; CHECK-SD: // %bb.0: // %entry 324; CHECK-SD-NEXT: mul v0.8h, v0.8h, v1.8h 325; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 326; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h 327; CHECK-SD-NEXT: umov w8, v0.h[1] 328; CHECK-SD-NEXT: umov w9, v0.h[0] 329; CHECK-SD-NEXT: umov w10, v0.h[2] 330; CHECK-SD-NEXT: mul w8, w9, w8 331; CHECK-SD-NEXT: umov w9, v0.h[3] 332; CHECK-SD-NEXT: mul w8, w8, w10 333; CHECK-SD-NEXT: mul w0, w8, w9 334; CHECK-SD-NEXT: ret 335; 336; CHECK-GI-LABEL: mulv_v16i16: 337; CHECK-GI: // %bb.0: // %entry 338; CHECK-GI-NEXT: mov d2, v0.d[1] 339; CHECK-GI-NEXT: mov d3, v1.d[1] 340; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h 341; CHECK-GI-NEXT: mul v1.4h, v1.4h, v3.4h 342; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h 343; CHECK-GI-NEXT: umov w8, v0.h[0] 344; CHECK-GI-NEXT: umov w9, v0.h[1] 345; CHECK-GI-NEXT: umov w10, v0.h[2] 346; CHECK-GI-NEXT: umov w11, v0.h[3] 347; CHECK-GI-NEXT: mul w8, w8, w9 348; CHECK-GI-NEXT: mul w9, w10, w11 349; CHECK-GI-NEXT: mul w0, w8, w9 350; CHECK-GI-NEXT: ret 351entry: 352 %arg1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a) 353 ret i16 %arg1 354} 355 356define i32 @mulv_v2i32(<2 x i32> %a) { 357; CHECK-LABEL: mulv_v2i32: 358; CHECK: // %bb.0: // %entry 359; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 360; CHECK-NEXT: mov w8, v0.s[1] 361; CHECK-NEXT: fmov w9, s0 362; CHECK-NEXT: mul w0, w9, w8 363; CHECK-NEXT: ret 364entry: 365 %arg1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a) 366 ret i32 %arg1 367} 368 369define i32 @mulv_v3i32(<3 x i32> %a) { 370; CHECK-LABEL: mulv_v3i32: 371; CHECK: // %bb.0: // %entry 372; CHECK-NEXT: mov v1.16b, v0.16b 373; CHECK-NEXT: mov w8, #1 // =0x1 374; CHECK-NEXT: mov v1.s[3], w8 375; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 376; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s 377; CHECK-NEXT: mov w8, v0.s[1] 378; CHECK-NEXT: fmov w9, s0 379; CHECK-NEXT: mul w0, w9, w8 380; CHECK-NEXT: ret 381entry: 382 %arg1 = call i32 @llvm.vector.reduce.mul.v3i32(<3 x i32> %a) 383 ret i32 %arg1 384} 385 386define i32 @mulv_v4i32(<4 x i32> %a) { 387; CHECK-SD-LABEL: mulv_v4i32: 388; CHECK-SD: // %bb.0: // %entry 389; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 390; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s 391; CHECK-SD-NEXT: mov w8, v0.s[1] 392; CHECK-SD-NEXT: fmov w9, s0 393; CHECK-SD-NEXT: mul w0, w9, w8 394; CHECK-SD-NEXT: ret 395; 396; CHECK-GI-LABEL: mulv_v4i32: 397; CHECK-GI: // %bb.0: // %entry 398; CHECK-GI-NEXT: mov d1, v0.d[1] 399; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s 400; CHECK-GI-NEXT: mov w8, v0.s[1] 401; CHECK-GI-NEXT: fmov w9, s0 402; CHECK-GI-NEXT: mul w0, w9, w8 403; CHECK-GI-NEXT: ret 404entry: 405 %arg1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a) 406 ret i32 %arg1 407} 408 409define i32 @mulv_v8i32(<8 x i32> %a) { 410; CHECK-SD-LABEL: mulv_v8i32: 411; CHECK-SD: // %bb.0: // %entry 412; CHECK-SD-NEXT: mul v0.4s, v0.4s, v1.4s 413; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 414; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s 415; CHECK-SD-NEXT: mov w8, v0.s[1] 416; CHECK-SD-NEXT: fmov w9, s0 417; CHECK-SD-NEXT: mul w0, w9, w8 418; CHECK-SD-NEXT: ret 419; 420; CHECK-GI-LABEL: mulv_v8i32: 421; CHECK-GI: // %bb.0: // %entry 422; CHECK-GI-NEXT: mov d2, v0.d[1] 423; CHECK-GI-NEXT: mov d3, v1.d[1] 424; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s 425; CHECK-GI-NEXT: mul v1.2s, v1.2s, v3.2s 426; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s 427; CHECK-GI-NEXT: mov w8, v0.s[1] 428; CHECK-GI-NEXT: fmov w9, s0 429; CHECK-GI-NEXT: mul w0, w9, w8 430; CHECK-GI-NEXT: ret 431entry: 432 %arg1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a) 433 ret i32 %arg1 434} 435 436define i64 @mulv_v2i64(<2 x i64> %a) { 437; CHECK-LABEL: mulv_v2i64: 438; CHECK: // %bb.0: // %entry 439; CHECK-NEXT: mov x8, v0.d[1] 440; CHECK-NEXT: fmov x9, d0 441; CHECK-NEXT: mul x0, x9, x8 442; CHECK-NEXT: ret 443entry: 444 %arg1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a) 445 ret i64 %arg1 446} 447 448define i64 @mulv_v3i64(<3 x i64> %a) { 449; CHECK-SD-LABEL: mulv_v3i64: 450; CHECK-SD: // %bb.0: // %entry 451; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 452; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 453; CHECK-SD-NEXT: fmov x8, d2 454; CHECK-SD-NEXT: fmov x9, d0 455; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 456; CHECK-SD-NEXT: mul x8, x9, x8 457; CHECK-SD-NEXT: fmov x9, d1 458; CHECK-SD-NEXT: mul x0, x9, x8 459; CHECK-SD-NEXT: ret 460; 461; CHECK-GI-LABEL: mulv_v3i64: 462; CHECK-GI: // %bb.0: // %entry 463; CHECK-GI-NEXT: fmov x8, d0 464; CHECK-GI-NEXT: fmov x9, d1 465; CHECK-GI-NEXT: mul x8, x8, x9 466; CHECK-GI-NEXT: fmov x9, d2 467; CHECK-GI-NEXT: mul x0, x8, x9 468; CHECK-GI-NEXT: ret 469entry: 470 %arg1 = call i64 @llvm.vector.reduce.mul.v3i64(<3 x i64> %a) 471 ret i64 %arg1 472} 473 474define i64 @mulv_v4i64(<4 x i64> %a) { 475; CHECK-SD-LABEL: mulv_v4i64: 476; CHECK-SD: // %bb.0: // %entry 477; CHECK-SD-NEXT: mov x8, v1.d[1] 478; CHECK-SD-NEXT: mov x9, v0.d[1] 479; CHECK-SD-NEXT: fmov x10, d0 480; CHECK-SD-NEXT: mul x8, x9, x8 481; CHECK-SD-NEXT: fmov x9, d1 482; CHECK-SD-NEXT: mul x9, x10, x9 483; CHECK-SD-NEXT: mul x0, x9, x8 484; CHECK-SD-NEXT: ret 485; 486; CHECK-GI-LABEL: mulv_v4i64: 487; CHECK-GI: // %bb.0: // %entry 488; CHECK-GI-NEXT: mov x8, v0.d[1] 489; CHECK-GI-NEXT: fmov x10, d0 490; CHECK-GI-NEXT: mov x9, v1.d[1] 491; CHECK-GI-NEXT: mul x8, x10, x8 492; CHECK-GI-NEXT: fmov x10, d1 493; CHECK-GI-NEXT: mul x9, x10, x9 494; CHECK-GI-NEXT: mul x0, x8, x9 495; CHECK-GI-NEXT: ret 496entry: 497 %arg1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a) 498 ret i64 %arg1 499} 500 501define i128 @mulv_v2i128(<2 x i128> %a) { 502; CHECK-SD-LABEL: mulv_v2i128: 503; CHECK-SD: // %bb.0: // %entry 504; CHECK-SD-NEXT: umulh x8, x0, x2 505; CHECK-SD-NEXT: madd x8, x0, x3, x8 506; CHECK-SD-NEXT: mul x0, x0, x2 507; CHECK-SD-NEXT: madd x1, x1, x2, x8 508; CHECK-SD-NEXT: ret 509; 510; CHECK-GI-LABEL: mulv_v2i128: 511; CHECK-GI: // %bb.0: // %entry 512; CHECK-GI-NEXT: mul x9, x0, x3 513; CHECK-GI-NEXT: mul x8, x0, x2 514; CHECK-GI-NEXT: umulh x10, x0, x2 515; CHECK-GI-NEXT: madd x9, x1, x2, x9 516; CHECK-GI-NEXT: mov x0, x8 517; CHECK-GI-NEXT: add x1, x9, x10 518; CHECK-GI-NEXT: ret 519entry: 520 %arg1 = call i128 @llvm.vector.reduce.mul.v2i128(<2 x i128> %a) 521 ret i128 %arg1 522} 523