1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 4; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 6 7define float @mul_HalfS(<2 x float> %bin.rdx) { 8; CHECK-SD-LABEL: mul_HalfS: 9; CHECK-SD: // %bb.0: 10; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 11; CHECK-SD-NEXT: fmul s0, s0, v0.s[1] 12; CHECK-SD-NEXT: ret 13; 14; CHECK-GI-LABEL: mul_HalfS: 15; CHECK-GI: // %bb.0: 16; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 17; CHECK-GI-NEXT: mov s1, v0.s[1] 18; CHECK-GI-NEXT: fmul s0, s0, s1 19; CHECK-GI-NEXT: ret 20 %r = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx) 21 ret float %r 22} 23 24define half @mul_HalfH(<4 x half> %bin.rdx) { 25; CHECK-SD-NOFP16-LABEL: mul_HalfH: 26; CHECK-SD-NOFP16: // %bb.0: 27; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 28; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] 29; CHECK-SD-NOFP16-NEXT: fcvt s2, h0 30; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 31; CHECK-SD-NOFP16-NEXT: fmul s1, s2, s1 32; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] 33; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] 34; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 35; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 36; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 37; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 38; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 39; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 40; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 41; CHECK-SD-NOFP16-NEXT: fmul s0, s1, s0 42; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 43; CHECK-SD-NOFP16-NEXT: ret 44; 45; CHECK-SD-FP16-LABEL: mul_HalfH: 46; CHECK-SD-FP16: // %bb.0: 47; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 48; CHECK-SD-FP16-NEXT: fmul h1, h0, v0.h[1] 49; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] 50; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3] 51; CHECK-SD-FP16-NEXT: ret 52; 53; CHECK-GI-NOFP16-LABEL: mul_HalfH: 54; CHECK-GI-NOFP16: // %bb.0: 55; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 56; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 57; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 58; CHECK-GI-NOFP16-NEXT: fmov s1, w8 59; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 60; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 61; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] 62; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 63; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 64; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 65; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 66; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] 67; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] 68; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 69; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 70; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 71; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 72; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 73; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 74; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 75; CHECK-GI-NOFP16-NEXT: fmul s0, s1, s0 76; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 77; CHECK-GI-NOFP16-NEXT: ret 78; 79; CHECK-GI-FP16-LABEL: mul_HalfH: 80; CHECK-GI-FP16: // %bb.0: 81; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 82; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] 83; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] 84; CHECK-GI-FP16-NEXT: fmul h1, h0, h1 85; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] 86; CHECK-GI-FP16-NEXT: fmul h1, h1, h2 87; CHECK-GI-FP16-NEXT: fmul h0, h1, h0 88; CHECK-GI-FP16-NEXT: ret 89 %r = call half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx) 90 ret half %r 91} 92 93 94define half @mul_H(<8 x half> %bin.rdx) { 95; CHECK-SD-NOFP16-LABEL: mul_H: 96; CHECK-SD-NOFP16: // %bb.0: 97; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] 98; CHECK-SD-NOFP16-NEXT: fcvt s2, h0 99; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 100; CHECK-SD-NOFP16-NEXT: fmul s1, s2, s1 101; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] 102; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 103; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 104; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 105; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 106; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[3] 107; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 108; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 109; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 110; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 111; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[4] 112; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 113; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 114; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 115; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 116; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[5] 117; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 118; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 119; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 120; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 121; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6] 122; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] 123; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 124; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 125; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 126; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 127; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2 128; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 129; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 130; CHECK-SD-NOFP16-NEXT: fmul s0, s1, s0 131; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 132; CHECK-SD-NOFP16-NEXT: ret 133; 134; CHECK-SD-FP16-LABEL: mul_H: 135; CHECK-SD-FP16: // %bb.0: 136; CHECK-SD-FP16-NEXT: fmul h1, h0, v0.h[1] 137; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2] 138; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[3] 139; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[4] 140; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[5] 141; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[6] 142; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[7] 143; CHECK-SD-FP16-NEXT: ret 144; 145; CHECK-GI-NOFP16-LABEL: mul_H: 146; CHECK-GI-NOFP16: // %bb.0: 147; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 148; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 149; CHECK-GI-NOFP16-NEXT: fmov s1, w8 150; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 151; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 152; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] 153; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 154; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 155; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 156; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 157; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] 158; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 159; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 160; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 161; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 162; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[3] 163; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 164; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 165; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 166; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 167; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4] 168; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 169; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 170; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 171; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 172; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] 173; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 174; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 175; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 176; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 177; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6] 178; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] 179; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 180; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 181; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 182; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 183; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 184; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 185; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 186; CHECK-GI-NOFP16-NEXT: fmul s0, s1, s0 187; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 188; CHECK-GI-NOFP16-NEXT: ret 189; 190; CHECK-GI-FP16-LABEL: mul_H: 191; CHECK-GI-FP16: // %bb.0: 192; CHECK-GI-FP16-NEXT: fmul h1, h0, v0.h[1] 193; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[2] 194; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[3] 195; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[4] 196; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[5] 197; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[6] 198; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[7] 199; CHECK-GI-FP16-NEXT: ret 200 %r = call half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx) 201 ret half %r 202} 203 204define float @mul_S(<4 x float> %bin.rdx) { 205; CHECK-LABEL: mul_S: 206; CHECK: // %bb.0: 207; CHECK-NEXT: fmul s1, s0, v0.s[1] 208; CHECK-NEXT: fmul s1, s1, v0.s[2] 209; CHECK-NEXT: fmul s0, s1, v0.s[3] 210; CHECK-NEXT: ret 211 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx) 212 ret float %r 213} 214 215define double @mul_D(<2 x double> %bin.rdx) { 216; CHECK-LABEL: mul_D: 217; CHECK: // %bb.0: 218; CHECK-NEXT: fmul d0, d0, v0.d[1] 219; CHECK-NEXT: ret 220 %r = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %bin.rdx) 221 ret double %r 222} 223 224define half @mul_2H(<16 x half> %bin.rdx) { 225; CHECK-SD-NOFP16-LABEL: mul_2H: 226; CHECK-SD-NOFP16: // %bb.0: 227; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1] 228; CHECK-SD-NOFP16-NEXT: fcvt s3, h0 229; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 230; CHECK-SD-NOFP16-NEXT: fmul s2, s3, s2 231; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2] 232; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 233; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 234; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 235; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3 236; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3] 237; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 238; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 239; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 240; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3 241; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[4] 242; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 243; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 244; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 245; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3 246; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5] 247; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 248; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 249; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 250; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3 251; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[6] 252; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] 253; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 254; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 255; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 256; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 257; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3 258; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 259; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 260; CHECK-SD-NOFP16-NEXT: fmul s0, s2, s0 261; CHECK-SD-NOFP16-NEXT: fcvt s2, h1 262; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 263; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 264; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 265; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1] 266; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 267; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 268; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 269; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 270; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2] 271; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 272; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 273; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 274; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 275; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3] 276; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 277; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 278; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 279; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 280; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[4] 281; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 282; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 283; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 284; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 285; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[5] 286; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 287; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 288; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 289; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 290; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[6] 291; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] 292; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 293; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 294; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 295; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 296; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2 297; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 298; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 299; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1 300; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 301; CHECK-SD-NOFP16-NEXT: ret 302; 303; CHECK-SD-FP16-LABEL: mul_2H: 304; CHECK-SD-FP16: // %bb.0: 305; CHECK-SD-FP16-NEXT: fmul h2, h0, v0.h[1] 306; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[2] 307; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[3] 308; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[4] 309; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[5] 310; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[6] 311; CHECK-SD-FP16-NEXT: fmul h0, h2, v0.h[7] 312; CHECK-SD-FP16-NEXT: fmul h0, h0, h1 313; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[1] 314; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[2] 315; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[3] 316; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[4] 317; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[5] 318; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[6] 319; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[7] 320; CHECK-SD-FP16-NEXT: ret 321; 322; CHECK-GI-NOFP16-LABEL: mul_2H: 323; CHECK-GI-NOFP16: // %bb.0: 324; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 325; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 326; CHECK-GI-NOFP16-NEXT: fmov s2, w8 327; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 328; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 329; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] 330; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 331; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 332; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 333; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 334; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] 335; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 336; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 337; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 338; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 339; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3] 340; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 341; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 342; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 343; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 344; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4] 345; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 346; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 347; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 348; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 349; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5] 350; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 351; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 352; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 353; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 354; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] 355; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] 356; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 357; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 358; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 359; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 360; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 361; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 362; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 363; CHECK-GI-NOFP16-NEXT: fmul s0, s2, s0 364; CHECK-GI-NOFP16-NEXT: fcvt s2, h1 365; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 366; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 367; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 368; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] 369; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 370; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 371; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 372; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 373; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2] 374; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 375; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 376; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 377; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 378; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[3] 379; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 380; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 381; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 382; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 383; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[4] 384; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 385; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 386; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 387; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 388; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[5] 389; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 390; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 391; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 392; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 393; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[6] 394; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] 395; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 396; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 397; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 398; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 399; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2 400; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 401; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 402; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1 403; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 404; CHECK-GI-NOFP16-NEXT: ret 405; 406; CHECK-GI-FP16-LABEL: mul_2H: 407; CHECK-GI-FP16: // %bb.0: 408; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1] 409; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[2] 410; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[3] 411; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[4] 412; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[5] 413; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[6] 414; CHECK-GI-FP16-NEXT: fmul h0, h2, v0.h[7] 415; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 416; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[1] 417; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[2] 418; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[3] 419; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[4] 420; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[5] 421; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[6] 422; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[7] 423; CHECK-GI-FP16-NEXT: ret 424 %r = call half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx) 425 ret half %r 426} 427 428define float @mul_2S(<8 x float> %bin.rdx) { 429; CHECK-LABEL: mul_2S: 430; CHECK: // %bb.0: 431; CHECK-NEXT: fmul s2, s0, v0.s[1] 432; CHECK-NEXT: fmul s2, s2, v0.s[2] 433; CHECK-NEXT: fmul s0, s2, v0.s[3] 434; CHECK-NEXT: fmul s0, s0, s1 435; CHECK-NEXT: fmul s0, s0, v1.s[1] 436; CHECK-NEXT: fmul s0, s0, v1.s[2] 437; CHECK-NEXT: fmul s0, s0, v1.s[3] 438; CHECK-NEXT: ret 439 %r = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx) 440 ret float %r 441} 442 443define double @mul_2D(<4 x double> %bin.rdx) { 444; CHECK-LABEL: mul_2D: 445; CHECK: // %bb.0: 446; CHECK-NEXT: fmul d0, d0, v0.d[1] 447; CHECK-NEXT: fmul d0, d0, d1 448; CHECK-NEXT: fmul d0, d0, v1.d[1] 449; CHECK-NEXT: ret 450 %r = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %bin.rdx) 451 ret double %r 452} 453 454; Added at least one test where the start value is not 1.0. 455define float @mul_S_init_42(<4 x float> %bin.rdx) { 456; CHECK-LABEL: mul_S_init_42: 457; CHECK: // %bb.0: 458; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 459; CHECK-NEXT: fmov s1, w8 460; CHECK-NEXT: fmul s1, s1, s0 461; CHECK-NEXT: fmul s1, s1, v0.s[1] 462; CHECK-NEXT: fmul s1, s1, v0.s[2] 463; CHECK-NEXT: fmul s0, s1, v0.s[3] 464; CHECK-NEXT: ret 465 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx) 466 ret float %r 467} 468 469; Function Attrs: nounwind readnone 470declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>) 471declare half @llvm.vector.reduce.fmul.f16.v8f16(half, <8 x half>) 472declare half @llvm.vector.reduce.fmul.f16.v16f16(half, <16 x half>) 473declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>) 474declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) 475declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>) 476declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) 477declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>) 478