1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 4; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 6 7define float @add_HalfS(<2 x float> %bin.rdx) { 8; CHECK-SD-LABEL: add_HalfS: 9; CHECK-SD: // %bb.0: 10; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 11; CHECK-SD-NEXT: faddp s0, v0.2s 12; CHECK-SD-NEXT: ret 13; 14; CHECK-GI-LABEL: add_HalfS: 15; CHECK-GI: // %bb.0: 16; CHECK-GI-NEXT: faddp s0, v0.2s 17; CHECK-GI-NEXT: ret 18 %r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx) 19 ret float %r 20} 21 22define half @add_HalfH(<4 x half> %bin.rdx) { 23; CHECK-SD-NOFP16-LABEL: add_HalfH: 24; CHECK-SD-NOFP16: // %bb.0: 25; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 26; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] 27; CHECK-SD-NOFP16-NEXT: fcvt s2, h0 28; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 29; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1 30; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] 31; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3] 32; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 33; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 34; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 35; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 36; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 37; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 38; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 39; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0 40; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 41; CHECK-SD-NOFP16-NEXT: ret 42; 43; CHECK-SD-FP16-LABEL: add_HalfH: 44; CHECK-SD-FP16: // %bb.0: 45; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 46; CHECK-SD-FP16-NEXT: mov h1, v0.h[2] 47; CHECK-SD-FP16-NEXT: faddp h2, v0.2h 48; CHECK-SD-FP16-NEXT: mov h0, v0.h[3] 49; CHECK-SD-FP16-NEXT: fadd h1, h2, h1 50; CHECK-SD-FP16-NEXT: fadd h0, h1, h0 51; CHECK-SD-FP16-NEXT: ret 52; 53; CHECK-GI-NOFP16-LABEL: add_HalfH: 54; CHECK-GI-NOFP16: // %bb.0: 55; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 56; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 57; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 58; CHECK-GI-NOFP16-NEXT: fmov s1, w8 59; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 60; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 61; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] 62; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 63; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 64; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 65; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 66; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] 67; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] 68; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 69; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 70; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 71; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 72; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 73; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 74; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 75; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 76; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 77; CHECK-GI-NOFP16-NEXT: ret 78; 79; CHECK-GI-FP16-LABEL: add_HalfH: 80; CHECK-GI-FP16: // %bb.0: 81; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 82; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] 83; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] 84; CHECK-GI-FP16-NEXT: fadd h1, h0, h1 85; CHECK-GI-FP16-NEXT: mov h0, v0.h[3] 86; CHECK-GI-FP16-NEXT: fadd h1, h1, h2 87; CHECK-GI-FP16-NEXT: fadd h0, h1, h0 88; CHECK-GI-FP16-NEXT: ret 89 %r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx) 90 ret half %r 91} 92 93 94define half @add_H(<8 x half> %bin.rdx) { 95; CHECK-SD-NOFP16-LABEL: add_H: 96; CHECK-SD-NOFP16: // %bb.0: 97; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1] 98; CHECK-SD-NOFP16-NEXT: fcvt s2, h0 99; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 100; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1 101; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2] 102; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 103; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 104; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 105; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 106; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[3] 107; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 108; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 109; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 110; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 111; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[4] 112; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 113; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 114; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 115; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 116; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[5] 117; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 118; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 119; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 120; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 121; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6] 122; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] 123; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 124; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 125; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 126; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 127; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2 128; CHECK-SD-NOFP16-NEXT: fcvt h1, s1 129; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 130; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0 131; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 132; CHECK-SD-NOFP16-NEXT: ret 133; 134; CHECK-SD-FP16-LABEL: add_H: 135; CHECK-SD-FP16: // %bb.0: 136; CHECK-SD-FP16-NEXT: mov h1, v0.h[2] 137; CHECK-SD-FP16-NEXT: faddp h2, v0.2h 138; CHECK-SD-FP16-NEXT: mov h3, v0.h[3] 139; CHECK-SD-FP16-NEXT: fadd h1, h2, h1 140; CHECK-SD-FP16-NEXT: mov h2, v0.h[4] 141; CHECK-SD-FP16-NEXT: fadd h1, h1, h3 142; CHECK-SD-FP16-NEXT: mov h3, v0.h[5] 143; CHECK-SD-FP16-NEXT: fadd h1, h1, h2 144; CHECK-SD-FP16-NEXT: mov h2, v0.h[6] 145; CHECK-SD-FP16-NEXT: mov h0, v0.h[7] 146; CHECK-SD-FP16-NEXT: fadd h1, h1, h3 147; CHECK-SD-FP16-NEXT: fadd h1, h1, h2 148; CHECK-SD-FP16-NEXT: fadd h0, h1, h0 149; CHECK-SD-FP16-NEXT: ret 150; 151; CHECK-GI-NOFP16-LABEL: add_H: 152; CHECK-GI-NOFP16: // %bb.0: 153; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 154; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 155; CHECK-GI-NOFP16-NEXT: fmov s1, w8 156; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 157; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 158; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] 159; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 160; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 161; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 162; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 163; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] 164; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 165; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 166; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 167; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 168; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[3] 169; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 170; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 171; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 172; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 173; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4] 174; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 175; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 176; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 177; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 178; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] 179; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 180; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 181; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 182; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 183; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6] 184; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] 185; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 186; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 187; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 188; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 189; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 190; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 191; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 192; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 193; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 194; CHECK-GI-NOFP16-NEXT: ret 195; 196; CHECK-GI-FP16-LABEL: add_H: 197; CHECK-GI-FP16: // %bb.0: 198; CHECK-GI-FP16-NEXT: mov h1, v0.h[2] 199; CHECK-GI-FP16-NEXT: faddp h2, v0.2h 200; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] 201; CHECK-GI-FP16-NEXT: fadd h1, h2, h1 202; CHECK-GI-FP16-NEXT: mov h2, v0.h[4] 203; CHECK-GI-FP16-NEXT: fadd h1, h1, h3 204; CHECK-GI-FP16-NEXT: mov h3, v0.h[5] 205; CHECK-GI-FP16-NEXT: fadd h1, h1, h2 206; CHECK-GI-FP16-NEXT: mov h2, v0.h[6] 207; CHECK-GI-FP16-NEXT: mov h0, v0.h[7] 208; CHECK-GI-FP16-NEXT: fadd h1, h1, h3 209; CHECK-GI-FP16-NEXT: fadd h1, h1, h2 210; CHECK-GI-FP16-NEXT: fadd h0, h1, h0 211; CHECK-GI-FP16-NEXT: ret 212 %r = call half @llvm.vector.reduce.fadd.f16.v8f16(half -0.0, <8 x half> %bin.rdx) 213 ret half %r 214} 215 216define float @add_S(<4 x float> %bin.rdx) { 217; CHECK-LABEL: add_S: 218; CHECK: // %bb.0: 219; CHECK-NEXT: mov s1, v0.s[2] 220; CHECK-NEXT: faddp s2, v0.2s 221; CHECK-NEXT: mov s0, v0.s[3] 222; CHECK-NEXT: fadd s1, s2, s1 223; CHECK-NEXT: fadd s0, s1, s0 224; CHECK-NEXT: ret 225 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx) 226 ret float %r 227} 228 229define double @add_D(<2 x double> %bin.rdx) { 230; CHECK-LABEL: add_D: 231; CHECK: // %bb.0: 232; CHECK-NEXT: faddp d0, v0.2d 233; CHECK-NEXT: ret 234 %r = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %bin.rdx) 235 ret double %r 236} 237 238define half @add_2H(<16 x half> %bin.rdx) { 239; CHECK-SD-NOFP16-LABEL: add_2H: 240; CHECK-SD-NOFP16: // %bb.0: 241; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1] 242; CHECK-SD-NOFP16-NEXT: fcvt s3, h0 243; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 244; CHECK-SD-NOFP16-NEXT: fadd s2, s3, s2 245; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2] 246; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 247; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 248; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 249; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3 250; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3] 251; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 252; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 253; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 254; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3 255; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[4] 256; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 257; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 258; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 259; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3 260; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5] 261; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 262; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 263; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 264; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3 265; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[6] 266; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7] 267; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 268; CHECK-SD-NOFP16-NEXT: fcvt s3, h3 269; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 270; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 271; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3 272; CHECK-SD-NOFP16-NEXT: fcvt h2, s2 273; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 274; CHECK-SD-NOFP16-NEXT: fadd s0, s2, s0 275; CHECK-SD-NOFP16-NEXT: fcvt s2, h1 276; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 277; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 278; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 279; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1] 280; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 281; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 282; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 283; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 284; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2] 285; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 286; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 287; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 288; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 289; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3] 290; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 291; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 292; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 293; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 294; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[4] 295; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 296; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 297; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 298; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 299; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[5] 300; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 301; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 302; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 303; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 304; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[6] 305; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7] 306; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 307; CHECK-SD-NOFP16-NEXT: fcvt s2, h2 308; CHECK-SD-NOFP16-NEXT: fcvt s1, h1 309; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 310; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2 311; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 312; CHECK-SD-NOFP16-NEXT: fcvt s0, h0 313; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1 314; CHECK-SD-NOFP16-NEXT: fcvt h0, s0 315; CHECK-SD-NOFP16-NEXT: ret 316; 317; CHECK-SD-FP16-LABEL: add_2H: 318; CHECK-SD-FP16: // %bb.0: 319; CHECK-SD-FP16-NEXT: mov h2, v0.h[2] 320; CHECK-SD-FP16-NEXT: faddp h3, v0.2h 321; CHECK-SD-FP16-NEXT: mov h4, v0.h[3] 322; CHECK-SD-FP16-NEXT: fadd h2, h3, h2 323; CHECK-SD-FP16-NEXT: mov h3, v0.h[4] 324; CHECK-SD-FP16-NEXT: fadd h2, h2, h4 325; CHECK-SD-FP16-NEXT: mov h4, v0.h[5] 326; CHECK-SD-FP16-NEXT: fadd h2, h2, h3 327; CHECK-SD-FP16-NEXT: mov h3, v0.h[6] 328; CHECK-SD-FP16-NEXT: mov h0, v0.h[7] 329; CHECK-SD-FP16-NEXT: fadd h2, h2, h4 330; CHECK-SD-FP16-NEXT: fadd h2, h2, h3 331; CHECK-SD-FP16-NEXT: mov h3, v1.h[2] 332; CHECK-SD-FP16-NEXT: fadd h0, h2, h0 333; CHECK-SD-FP16-NEXT: mov h2, v1.h[1] 334; CHECK-SD-FP16-NEXT: fadd h0, h0, h1 335; CHECK-SD-FP16-NEXT: fadd h0, h0, h2 336; CHECK-SD-FP16-NEXT: mov h2, v1.h[3] 337; CHECK-SD-FP16-NEXT: fadd h0, h0, h3 338; CHECK-SD-FP16-NEXT: mov h3, v1.h[4] 339; CHECK-SD-FP16-NEXT: fadd h0, h0, h2 340; CHECK-SD-FP16-NEXT: mov h2, v1.h[5] 341; CHECK-SD-FP16-NEXT: fadd h0, h0, h3 342; CHECK-SD-FP16-NEXT: mov h3, v1.h[6] 343; CHECK-SD-FP16-NEXT: mov h1, v1.h[7] 344; CHECK-SD-FP16-NEXT: fadd h0, h0, h2 345; CHECK-SD-FP16-NEXT: fadd h0, h0, h3 346; CHECK-SD-FP16-NEXT: fadd h0, h0, h1 347; CHECK-SD-FP16-NEXT: ret 348; 349; CHECK-GI-NOFP16-LABEL: add_2H: 350; CHECK-GI-NOFP16: // %bb.0: 351; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 352; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 353; CHECK-GI-NOFP16-NEXT: fmov s2, w8 354; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 355; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 356; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] 357; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 358; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 359; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 360; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 361; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] 362; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 363; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 364; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 365; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 366; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3] 367; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 368; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 369; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 370; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 371; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4] 372; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 373; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 374; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 375; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 376; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5] 377; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 378; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 379; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 380; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 381; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] 382; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] 383; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 384; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 385; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 386; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 387; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 388; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 389; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 390; CHECK-GI-NOFP16-NEXT: fadd s0, s2, s0 391; CHECK-GI-NOFP16-NEXT: fcvt s2, h1 392; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 393; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 394; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 395; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] 396; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 397; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 398; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 399; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 400; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2] 401; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 402; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 403; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 404; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 405; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[3] 406; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 407; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 408; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 409; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 410; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[4] 411; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 412; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 413; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 414; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 415; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[5] 416; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 417; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 418; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 419; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 420; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[6] 421; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] 422; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 423; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 424; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 425; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 426; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2 427; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 428; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 429; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 430; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 431; CHECK-GI-NOFP16-NEXT: ret 432; 433; CHECK-GI-FP16-LABEL: add_2H: 434; CHECK-GI-FP16: // %bb.0: 435; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] 436; CHECK-GI-FP16-NEXT: faddp h3, v0.2h 437; CHECK-GI-FP16-NEXT: mov h4, v0.h[3] 438; CHECK-GI-FP16-NEXT: fadd h2, h3, h2 439; CHECK-GI-FP16-NEXT: mov h3, v0.h[4] 440; CHECK-GI-FP16-NEXT: fadd h2, h2, h4 441; CHECK-GI-FP16-NEXT: mov h4, v0.h[5] 442; CHECK-GI-FP16-NEXT: fadd h2, h2, h3 443; CHECK-GI-FP16-NEXT: mov h3, v0.h[6] 444; CHECK-GI-FP16-NEXT: mov h0, v0.h[7] 445; CHECK-GI-FP16-NEXT: fadd h2, h2, h4 446; CHECK-GI-FP16-NEXT: fadd h2, h2, h3 447; CHECK-GI-FP16-NEXT: mov h3, v1.h[2] 448; CHECK-GI-FP16-NEXT: fadd h0, h2, h0 449; CHECK-GI-FP16-NEXT: mov h2, v1.h[1] 450; CHECK-GI-FP16-NEXT: fadd h0, h0, h1 451; CHECK-GI-FP16-NEXT: fadd h0, h0, h2 452; CHECK-GI-FP16-NEXT: mov h2, v1.h[3] 453; CHECK-GI-FP16-NEXT: fadd h0, h0, h3 454; CHECK-GI-FP16-NEXT: mov h3, v1.h[4] 455; CHECK-GI-FP16-NEXT: fadd h0, h0, h2 456; CHECK-GI-FP16-NEXT: mov h2, v1.h[5] 457; CHECK-GI-FP16-NEXT: fadd h0, h0, h3 458; CHECK-GI-FP16-NEXT: mov h3, v1.h[6] 459; CHECK-GI-FP16-NEXT: mov h1, v1.h[7] 460; CHECK-GI-FP16-NEXT: fadd h0, h0, h2 461; CHECK-GI-FP16-NEXT: fadd h0, h0, h3 462; CHECK-GI-FP16-NEXT: fadd h0, h0, h1 463; CHECK-GI-FP16-NEXT: ret 464 %r = call half @llvm.vector.reduce.fadd.f16.v16f16(half -0.0, <16 x half> %bin.rdx) 465 ret half %r 466} 467 468define float @add_2S(<8 x float> %bin.rdx) { 469; CHECK-LABEL: add_2S: 470; CHECK: // %bb.0: 471; CHECK-NEXT: mov s2, v0.s[2] 472; CHECK-NEXT: faddp s3, v0.2s 473; CHECK-NEXT: mov s0, v0.s[3] 474; CHECK-NEXT: fadd s2, s3, s2 475; CHECK-NEXT: mov s3, v1.s[2] 476; CHECK-NEXT: fadd s0, s2, s0 477; CHECK-NEXT: mov s2, v1.s[1] 478; CHECK-NEXT: fadd s0, s0, s1 479; CHECK-NEXT: mov s1, v1.s[3] 480; CHECK-NEXT: fadd s0, s0, s2 481; CHECK-NEXT: fadd s0, s0, s3 482; CHECK-NEXT: fadd s0, s0, s1 483; CHECK-NEXT: ret 484 %r = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx) 485 ret float %r 486} 487 488define double @add_2D(<4 x double> %bin.rdx) { 489; CHECK-LABEL: add_2D: 490; CHECK: // %bb.0: 491; CHECK-NEXT: faddp d0, v0.2d 492; CHECK-NEXT: mov d2, v1.d[1] 493; CHECK-NEXT: fadd d0, d0, d1 494; CHECK-NEXT: fadd d0, d0, d2 495; CHECK-NEXT: ret 496 %r = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %bin.rdx) 497 ret double %r 498} 499 500; Added at least one test where the start value is not -0.0. 501define float @add_S_init_42(<4 x float> %bin.rdx) { 502; CHECK-LABEL: add_S_init_42: 503; CHECK: // %bb.0: 504; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 505; CHECK-NEXT: mov s2, v0.s[1] 506; CHECK-NEXT: mov s3, v0.s[2] 507; CHECK-NEXT: fmov s1, w8 508; CHECK-NEXT: fadd s1, s0, s1 509; CHECK-NEXT: mov s0, v0.s[3] 510; CHECK-NEXT: fadd s1, s1, s2 511; CHECK-NEXT: fadd s1, s1, s3 512; CHECK-NEXT: fadd s0, s1, s0 513; CHECK-NEXT: ret 514 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx) 515 ret float %r 516} 517 518; Function Attrs: nounwind readnone 519declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>) 520declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>) 521declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>) 522declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>) 523declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) 524declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) 525declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) 526declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>) 527