1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \ 8; RUN: FileCheck %s --check-prefix=PWR10LE 9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 10; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \ 11; RUN: FileCheck %s --check-prefix=PWR10BE 12 13;; 14;; Vectors of f32 15;; 16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 { 17; PWR9LE-LABEL: v2f32: 18; PWR9LE: # %bb.0: # %entry 19; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 20; PWR9LE-NEXT: xxswapd vs1, v2 21; PWR9LE-NEXT: xscvspdpn f0, vs0 22; PWR9LE-NEXT: xscvspdpn f1, vs1 23; PWR9LE-NEXT: xsmulsp f1, f0, f1 24; PWR9LE-NEXT: blr 25; 26; PWR9BE-LABEL: v2f32: 27; PWR9BE: # %bb.0: # %entry 28; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 29; PWR9BE-NEXT: xscvspdpn f0, v2 30; PWR9BE-NEXT: xscvspdpn f1, vs1 31; PWR9BE-NEXT: xsmulsp f1, f0, f1 32; PWR9BE-NEXT: blr 33; 34; PWR10LE-LABEL: v2f32: 35; PWR10LE: # %bb.0: # %entry 36; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 37; PWR10LE-NEXT: xxswapd vs1, v2 38; PWR10LE-NEXT: xscvspdpn f0, vs0 39; PWR10LE-NEXT: xscvspdpn f1, vs1 40; PWR10LE-NEXT: xsmulsp f1, f0, f1 41; PWR10LE-NEXT: blr 42; 43; PWR10BE-LABEL: v2f32: 44; PWR10BE: # %bb.0: # %entry 45; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 46; PWR10BE-NEXT: xscvspdpn f0, v2 47; PWR10BE-NEXT: xscvspdpn f1, vs1 48; PWR10BE-NEXT: xsmulsp f1, f0, f1 49; PWR10BE-NEXT: blr 50entry: 51 %0 = call float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) 52 ret float %0 53} 54 55define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 { 56; PWR9LE-LABEL: v2f32_b: 57; PWR9LE: # %bb.0: # %entry 58; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 59; PWR9LE-NEXT: xscvspdpn f0, vs0 60; PWR9LE-NEXT: xsmulsp f0, f1, f0 61; PWR9LE-NEXT: xxswapd vs1, v2 62; PWR9LE-NEXT: xscvspdpn f1, vs1 63; PWR9LE-NEXT: xsmulsp f1, f0, f1 64; PWR9LE-NEXT: blr 65; 66; PWR9BE-LABEL: v2f32_b: 67; PWR9BE: # %bb.0: # %entry 68; PWR9BE-NEXT: xscvspdpn f0, v2 69; PWR9BE-NEXT: xsmulsp f0, f1, f0 70; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 71; PWR9BE-NEXT: xscvspdpn f1, vs1 72; PWR9BE-NEXT: xsmulsp f1, f0, f1 73; PWR9BE-NEXT: blr 74; 75; PWR10LE-LABEL: v2f32_b: 76; PWR10LE: # %bb.0: # %entry 77; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 78; PWR10LE-NEXT: xscvspdpn f0, vs0 79; PWR10LE-NEXT: xsmulsp f0, f1, f0 80; PWR10LE-NEXT: xxswapd vs1, v2 81; PWR10LE-NEXT: xscvspdpn f1, vs1 82; PWR10LE-NEXT: xsmulsp f1, f0, f1 83; PWR10LE-NEXT: blr 84; 85; PWR10BE-LABEL: v2f32_b: 86; PWR10BE: # %bb.0: # %entry 87; PWR10BE-NEXT: xscvspdpn f0, v2 88; PWR10BE-NEXT: xsmulsp f0, f1, f0 89; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 90; PWR10BE-NEXT: xscvspdpn f1, vs1 91; PWR10BE-NEXT: xsmulsp f1, f0, f1 92; PWR10BE-NEXT: blr 93entry: 94 %0 = call float @llvm.vector.reduce.fmul.v2f32(float %b, <2 x float> %a) 95 ret float %0 96} 97 98define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 { 99; PWR9LE-LABEL: v2f32_fast: 100; PWR9LE: # %bb.0: # %entry 101; PWR9LE-NEXT: xxspltw vs0, v2, 2 102; PWR9LE-NEXT: xvmulsp vs0, v2, vs0 103; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 104; PWR9LE-NEXT: xscvspdpn f1, vs0 105; PWR9LE-NEXT: blr 106; 107; PWR9BE-LABEL: v2f32_fast: 108; PWR9BE: # %bb.0: # %entry 109; PWR9BE-NEXT: xxspltw vs0, v2, 1 110; PWR9BE-NEXT: xvmulsp vs0, v2, vs0 111; PWR9BE-NEXT: xscvspdpn f1, vs0 112; PWR9BE-NEXT: blr 113; 114; PWR10LE-LABEL: v2f32_fast: 115; PWR10LE: # %bb.0: # %entry 116; PWR10LE-NEXT: xxspltw vs0, v2, 2 117; PWR10LE-NEXT: xvmulsp vs0, v2, vs0 118; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 119; PWR10LE-NEXT: xscvspdpn f1, vs0 120; PWR10LE-NEXT: blr 121; 122; PWR10BE-LABEL: v2f32_fast: 123; PWR10BE: # %bb.0: # %entry 124; PWR10BE-NEXT: xxspltw vs0, v2, 1 125; PWR10BE-NEXT: xvmulsp vs0, v2, vs0 126; PWR10BE-NEXT: xscvspdpn f1, vs0 127; PWR10BE-NEXT: blr 128entry: 129 %0 = call fast float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) 130 ret float %0 131} 132 133define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 { 134; PWR9LE-LABEL: v4f32: 135; PWR9LE: # %bb.0: # %entry 136; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 137; PWR9LE-NEXT: xxswapd vs1, v2 138; PWR9LE-NEXT: xscvspdpn f0, vs0 139; PWR9LE-NEXT: xscvspdpn f1, vs1 140; PWR9LE-NEXT: xsmulsp f0, f0, f1 141; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 142; PWR9LE-NEXT: xscvspdpn f1, vs1 143; PWR9LE-NEXT: xsmulsp f0, f0, f1 144; PWR9LE-NEXT: xscvspdpn f1, v2 145; PWR9LE-NEXT: xsmulsp f1, f0, f1 146; PWR9LE-NEXT: blr 147; 148; PWR9BE-LABEL: v4f32: 149; PWR9BE: # %bb.0: # %entry 150; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 151; PWR9BE-NEXT: xscvspdpn f0, v2 152; PWR9BE-NEXT: xscvspdpn f1, vs1 153; PWR9BE-NEXT: xsmulsp f0, f0, f1 154; PWR9BE-NEXT: xxswapd vs1, v2 155; PWR9BE-NEXT: xscvspdpn f1, vs1 156; PWR9BE-NEXT: xsmulsp f0, f0, f1 157; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 158; PWR9BE-NEXT: xscvspdpn f1, vs1 159; PWR9BE-NEXT: xsmulsp f1, f0, f1 160; PWR9BE-NEXT: blr 161; 162; PWR10LE-LABEL: v4f32: 163; PWR10LE: # %bb.0: # %entry 164; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 165; PWR10LE-NEXT: xxswapd vs1, v2 166; PWR10LE-NEXT: xscvspdpn f0, vs0 167; PWR10LE-NEXT: xscvspdpn f1, vs1 168; PWR10LE-NEXT: xsmulsp f0, f0, f1 169; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 170; PWR10LE-NEXT: xscvspdpn f1, vs1 171; PWR10LE-NEXT: xsmulsp f0, f0, f1 172; PWR10LE-NEXT: xscvspdpn f1, v2 173; PWR10LE-NEXT: xsmulsp f1, f0, f1 174; PWR10LE-NEXT: blr 175; 176; PWR10BE-LABEL: v4f32: 177; PWR10BE: # %bb.0: # %entry 178; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 179; PWR10BE-NEXT: xscvspdpn f0, v2 180; PWR10BE-NEXT: xscvspdpn f1, vs1 181; PWR10BE-NEXT: xsmulsp f0, f0, f1 182; PWR10BE-NEXT: xxswapd vs1, v2 183; PWR10BE-NEXT: xscvspdpn f1, vs1 184; PWR10BE-NEXT: xsmulsp f0, f0, f1 185; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 186; PWR10BE-NEXT: xscvspdpn f1, vs1 187; PWR10BE-NEXT: xsmulsp f1, f0, f1 188; PWR10BE-NEXT: blr 189entry: 190 %0 = call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) 191 ret float %0 192} 193 194define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 { 195; PWR9LE-LABEL: v4f32_b: 196; PWR9LE: # %bb.0: # %entry 197; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 198; PWR9LE-NEXT: xscvspdpn f0, vs0 199; PWR9LE-NEXT: xsmulsp f0, f1, f0 200; PWR9LE-NEXT: xxswapd vs1, v2 201; PWR9LE-NEXT: xscvspdpn f1, vs1 202; PWR9LE-NEXT: xsmulsp f0, f0, f1 203; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 204; PWR9LE-NEXT: xscvspdpn f1, vs1 205; PWR9LE-NEXT: xsmulsp f0, f0, f1 206; PWR9LE-NEXT: xscvspdpn f1, v2 207; PWR9LE-NEXT: xsmulsp f1, f0, f1 208; PWR9LE-NEXT: blr 209; 210; PWR9BE-LABEL: v4f32_b: 211; PWR9BE: # %bb.0: # %entry 212; PWR9BE-NEXT: xscvspdpn f0, v2 213; PWR9BE-NEXT: xsmulsp f0, f1, f0 214; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 215; PWR9BE-NEXT: xscvspdpn f1, vs1 216; PWR9BE-NEXT: xsmulsp f0, f0, f1 217; PWR9BE-NEXT: xxswapd vs1, v2 218; PWR9BE-NEXT: xscvspdpn f1, vs1 219; PWR9BE-NEXT: xsmulsp f0, f0, f1 220; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 221; PWR9BE-NEXT: xscvspdpn f1, vs1 222; PWR9BE-NEXT: xsmulsp f1, f0, f1 223; PWR9BE-NEXT: blr 224; 225; PWR10LE-LABEL: v4f32_b: 226; PWR10LE: # %bb.0: # %entry 227; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 228; PWR10LE-NEXT: xscvspdpn f0, vs0 229; PWR10LE-NEXT: xsmulsp f0, f1, f0 230; PWR10LE-NEXT: xxswapd vs1, v2 231; PWR10LE-NEXT: xscvspdpn f1, vs1 232; PWR10LE-NEXT: xsmulsp f0, f0, f1 233; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 234; PWR10LE-NEXT: xscvspdpn f1, vs1 235; PWR10LE-NEXT: xsmulsp f0, f0, f1 236; PWR10LE-NEXT: xscvspdpn f1, v2 237; PWR10LE-NEXT: xsmulsp f1, f0, f1 238; PWR10LE-NEXT: blr 239; 240; PWR10BE-LABEL: v4f32_b: 241; PWR10BE: # %bb.0: # %entry 242; PWR10BE-NEXT: xscvspdpn f0, v2 243; PWR10BE-NEXT: xsmulsp f0, f1, f0 244; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 245; PWR10BE-NEXT: xscvspdpn f1, vs1 246; PWR10BE-NEXT: xsmulsp f0, f0, f1 247; PWR10BE-NEXT: xxswapd vs1, v2 248; PWR10BE-NEXT: xscvspdpn f1, vs1 249; PWR10BE-NEXT: xsmulsp f0, f0, f1 250; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 251; PWR10BE-NEXT: xscvspdpn f1, vs1 252; PWR10BE-NEXT: xsmulsp f1, f0, f1 253; PWR10BE-NEXT: blr 254entry: 255 %0 = call float @llvm.vector.reduce.fmul.v4f32(float %b, <4 x float> %a) 256 ret float %0 257} 258 259define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 { 260; PWR9LE-LABEL: v4f32_fast: 261; PWR9LE: # %bb.0: # %entry 262; PWR9LE-NEXT: xxswapd v3, v2 263; PWR9LE-NEXT: xvmulsp vs0, v2, v3 264; PWR9LE-NEXT: xxspltw vs1, vs0, 2 265; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 266; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 267; PWR9LE-NEXT: xscvspdpn f1, vs0 268; PWR9LE-NEXT: blr 269; 270; PWR9BE-LABEL: v4f32_fast: 271; PWR9BE: # %bb.0: # %entry 272; PWR9BE-NEXT: xxswapd v3, v2 273; PWR9BE-NEXT: xvmulsp vs0, v2, v3 274; PWR9BE-NEXT: xxspltw vs1, vs0, 1 275; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 276; PWR9BE-NEXT: xscvspdpn f1, vs0 277; PWR9BE-NEXT: blr 278; 279; PWR10LE-LABEL: v4f32_fast: 280; PWR10LE: # %bb.0: # %entry 281; PWR10LE-NEXT: xxswapd v3, v2 282; PWR10LE-NEXT: xvmulsp vs0, v2, v3 283; PWR10LE-NEXT: xxspltw vs1, vs0, 2 284; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 285; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 286; PWR10LE-NEXT: xscvspdpn f1, vs0 287; PWR10LE-NEXT: blr 288; 289; PWR10BE-LABEL: v4f32_fast: 290; PWR10BE: # %bb.0: # %entry 291; PWR10BE-NEXT: xxswapd v3, v2 292; PWR10BE-NEXT: xvmulsp vs0, v2, v3 293; PWR10BE-NEXT: xxspltw vs1, vs0, 1 294; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 295; PWR10BE-NEXT: xscvspdpn f1, vs0 296; PWR10BE-NEXT: blr 297entry: 298 %0 = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) 299 ret float %0 300} 301 302define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 { 303; PWR9LE-LABEL: v8f32: 304; PWR9LE: # %bb.0: # %entry 305; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 306; PWR9LE-NEXT: xxswapd vs1, v2 307; PWR9LE-NEXT: xscvspdpn f0, vs0 308; PWR9LE-NEXT: xscvspdpn f1, vs1 309; PWR9LE-NEXT: xsmulsp f0, f0, f1 310; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 311; PWR9LE-NEXT: xscvspdpn f1, vs1 312; PWR9LE-NEXT: xsmulsp f0, f0, f1 313; PWR9LE-NEXT: xscvspdpn f1, v2 314; PWR9LE-NEXT: xsmulsp f0, f0, f1 315; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 316; PWR9LE-NEXT: xscvspdpn f1, vs1 317; PWR9LE-NEXT: xsmulsp f0, f0, f1 318; PWR9LE-NEXT: xxswapd vs1, v3 319; PWR9LE-NEXT: xscvspdpn f1, vs1 320; PWR9LE-NEXT: xsmulsp f0, f0, f1 321; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 322; PWR9LE-NEXT: xscvspdpn f1, vs1 323; PWR9LE-NEXT: xsmulsp f0, f0, f1 324; PWR9LE-NEXT: xscvspdpn f1, v3 325; PWR9LE-NEXT: xsmulsp f1, f0, f1 326; PWR9LE-NEXT: blr 327; 328; PWR9BE-LABEL: v8f32: 329; PWR9BE: # %bb.0: # %entry 330; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 331; PWR9BE-NEXT: xscvspdpn f0, v2 332; PWR9BE-NEXT: xscvspdpn f1, vs1 333; PWR9BE-NEXT: xsmulsp f0, f0, f1 334; PWR9BE-NEXT: xxswapd vs1, v2 335; PWR9BE-NEXT: xscvspdpn f1, vs1 336; PWR9BE-NEXT: xsmulsp f0, f0, f1 337; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 338; PWR9BE-NEXT: xscvspdpn f1, vs1 339; PWR9BE-NEXT: xsmulsp f0, f0, f1 340; PWR9BE-NEXT: xscvspdpn f1, v3 341; PWR9BE-NEXT: xsmulsp f0, f0, f1 342; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 343; PWR9BE-NEXT: xscvspdpn f1, vs1 344; PWR9BE-NEXT: xsmulsp f0, f0, f1 345; PWR9BE-NEXT: xxswapd vs1, v3 346; PWR9BE-NEXT: xscvspdpn f1, vs1 347; PWR9BE-NEXT: xsmulsp f0, f0, f1 348; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 349; PWR9BE-NEXT: xscvspdpn f1, vs1 350; PWR9BE-NEXT: xsmulsp f1, f0, f1 351; PWR9BE-NEXT: blr 352; 353; PWR10LE-LABEL: v8f32: 354; PWR10LE: # %bb.0: # %entry 355; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 356; PWR10LE-NEXT: xxswapd vs1, v2 357; PWR10LE-NEXT: xscvspdpn f0, vs0 358; PWR10LE-NEXT: xscvspdpn f1, vs1 359; PWR10LE-NEXT: xsmulsp f0, f0, f1 360; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 361; PWR10LE-NEXT: xscvspdpn f1, vs1 362; PWR10LE-NEXT: xsmulsp f0, f0, f1 363; PWR10LE-NEXT: xscvspdpn f1, v2 364; PWR10LE-NEXT: xsmulsp f0, f0, f1 365; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 366; PWR10LE-NEXT: xscvspdpn f1, vs1 367; PWR10LE-NEXT: xsmulsp f0, f0, f1 368; PWR10LE-NEXT: xxswapd vs1, v3 369; PWR10LE-NEXT: xscvspdpn f1, vs1 370; PWR10LE-NEXT: xsmulsp f0, f0, f1 371; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 372; PWR10LE-NEXT: xscvspdpn f1, vs1 373; PWR10LE-NEXT: xsmulsp f0, f0, f1 374; PWR10LE-NEXT: xscvspdpn f1, v3 375; PWR10LE-NEXT: xsmulsp f1, f0, f1 376; PWR10LE-NEXT: blr 377; 378; PWR10BE-LABEL: v8f32: 379; PWR10BE: # %bb.0: # %entry 380; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 381; PWR10BE-NEXT: xscvspdpn f0, v2 382; PWR10BE-NEXT: xscvspdpn f1, vs1 383; PWR10BE-NEXT: xsmulsp f0, f0, f1 384; PWR10BE-NEXT: xxswapd vs1, v2 385; PWR10BE-NEXT: xscvspdpn f1, vs1 386; PWR10BE-NEXT: xsmulsp f0, f0, f1 387; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 388; PWR10BE-NEXT: xscvspdpn f1, vs1 389; PWR10BE-NEXT: xsmulsp f0, f0, f1 390; PWR10BE-NEXT: xscvspdpn f1, v3 391; PWR10BE-NEXT: xsmulsp f0, f0, f1 392; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 393; PWR10BE-NEXT: xscvspdpn f1, vs1 394; PWR10BE-NEXT: xsmulsp f0, f0, f1 395; PWR10BE-NEXT: xxswapd vs1, v3 396; PWR10BE-NEXT: xscvspdpn f1, vs1 397; PWR10BE-NEXT: xsmulsp f0, f0, f1 398; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 399; PWR10BE-NEXT: xscvspdpn f1, vs1 400; PWR10BE-NEXT: xsmulsp f1, f0, f1 401; PWR10BE-NEXT: blr 402entry: 403 %0 = call float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) 404 ret float %0 405} 406 407define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 { 408; PWR9LE-LABEL: v8f32_b: 409; PWR9LE: # %bb.0: # %entry 410; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 411; PWR9LE-NEXT: xscvspdpn f0, vs0 412; PWR9LE-NEXT: xsmulsp f0, f1, f0 413; PWR9LE-NEXT: xxswapd vs1, v2 414; PWR9LE-NEXT: xscvspdpn f1, vs1 415; PWR9LE-NEXT: xsmulsp f0, f0, f1 416; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 417; PWR9LE-NEXT: xscvspdpn f1, vs1 418; PWR9LE-NEXT: xsmulsp f0, f0, f1 419; PWR9LE-NEXT: xscvspdpn f1, v2 420; PWR9LE-NEXT: xsmulsp f0, f0, f1 421; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 422; PWR9LE-NEXT: xscvspdpn f1, vs1 423; PWR9LE-NEXT: xsmulsp f0, f0, f1 424; PWR9LE-NEXT: xxswapd vs1, v3 425; PWR9LE-NEXT: xscvspdpn f1, vs1 426; PWR9LE-NEXT: xsmulsp f0, f0, f1 427; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 428; PWR9LE-NEXT: xscvspdpn f1, vs1 429; PWR9LE-NEXT: xsmulsp f0, f0, f1 430; PWR9LE-NEXT: xscvspdpn f1, v3 431; PWR9LE-NEXT: xsmulsp f1, f0, f1 432; PWR9LE-NEXT: blr 433; 434; PWR9BE-LABEL: v8f32_b: 435; PWR9BE: # %bb.0: # %entry 436; PWR9BE-NEXT: xscvspdpn f0, v2 437; PWR9BE-NEXT: xsmulsp f0, f1, f0 438; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 439; PWR9BE-NEXT: xscvspdpn f1, vs1 440; PWR9BE-NEXT: xsmulsp f0, f0, f1 441; PWR9BE-NEXT: xxswapd vs1, v2 442; PWR9BE-NEXT: xscvspdpn f1, vs1 443; PWR9BE-NEXT: xsmulsp f0, f0, f1 444; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 445; PWR9BE-NEXT: xscvspdpn f1, vs1 446; PWR9BE-NEXT: xsmulsp f0, f0, f1 447; PWR9BE-NEXT: xscvspdpn f1, v3 448; PWR9BE-NEXT: xsmulsp f0, f0, f1 449; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 450; PWR9BE-NEXT: xscvspdpn f1, vs1 451; PWR9BE-NEXT: xsmulsp f0, f0, f1 452; PWR9BE-NEXT: xxswapd vs1, v3 453; PWR9BE-NEXT: xscvspdpn f1, vs1 454; PWR9BE-NEXT: xsmulsp f0, f0, f1 455; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 456; PWR9BE-NEXT: xscvspdpn f1, vs1 457; PWR9BE-NEXT: xsmulsp f1, f0, f1 458; PWR9BE-NEXT: blr 459; 460; PWR10LE-LABEL: v8f32_b: 461; PWR10LE: # %bb.0: # %entry 462; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 463; PWR10LE-NEXT: xscvspdpn f0, vs0 464; PWR10LE-NEXT: xsmulsp f0, f1, f0 465; PWR10LE-NEXT: xxswapd vs1, v2 466; PWR10LE-NEXT: xscvspdpn f1, vs1 467; PWR10LE-NEXT: xsmulsp f0, f0, f1 468; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 469; PWR10LE-NEXT: xscvspdpn f1, vs1 470; PWR10LE-NEXT: xsmulsp f0, f0, f1 471; PWR10LE-NEXT: xscvspdpn f1, v2 472; PWR10LE-NEXT: xsmulsp f0, f0, f1 473; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 474; PWR10LE-NEXT: xscvspdpn f1, vs1 475; PWR10LE-NEXT: xsmulsp f0, f0, f1 476; PWR10LE-NEXT: xxswapd vs1, v3 477; PWR10LE-NEXT: xscvspdpn f1, vs1 478; PWR10LE-NEXT: xsmulsp f0, f0, f1 479; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 480; PWR10LE-NEXT: xscvspdpn f1, vs1 481; PWR10LE-NEXT: xsmulsp f0, f0, f1 482; PWR10LE-NEXT: xscvspdpn f1, v3 483; PWR10LE-NEXT: xsmulsp f1, f0, f1 484; PWR10LE-NEXT: blr 485; 486; PWR10BE-LABEL: v8f32_b: 487; PWR10BE: # %bb.0: # %entry 488; PWR10BE-NEXT: xscvspdpn f0, v2 489; PWR10BE-NEXT: xsmulsp f0, f1, f0 490; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 491; PWR10BE-NEXT: xscvspdpn f1, vs1 492; PWR10BE-NEXT: xsmulsp f0, f0, f1 493; PWR10BE-NEXT: xxswapd vs1, v2 494; PWR10BE-NEXT: xscvspdpn f1, vs1 495; PWR10BE-NEXT: xsmulsp f0, f0, f1 496; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 497; PWR10BE-NEXT: xscvspdpn f1, vs1 498; PWR10BE-NEXT: xsmulsp f0, f0, f1 499; PWR10BE-NEXT: xscvspdpn f1, v3 500; PWR10BE-NEXT: xsmulsp f0, f0, f1 501; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 502; PWR10BE-NEXT: xscvspdpn f1, vs1 503; PWR10BE-NEXT: xsmulsp f0, f0, f1 504; PWR10BE-NEXT: xxswapd vs1, v3 505; PWR10BE-NEXT: xscvspdpn f1, vs1 506; PWR10BE-NEXT: xsmulsp f0, f0, f1 507; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 508; PWR10BE-NEXT: xscvspdpn f1, vs1 509; PWR10BE-NEXT: xsmulsp f1, f0, f1 510; PWR10BE-NEXT: blr 511entry: 512 %0 = call float @llvm.vector.reduce.fmul.v8f32(float %b, <8 x float> %a) 513 ret float %0 514} 515 516define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 { 517; PWR9LE-LABEL: v8f32_fast: 518; PWR9LE: # %bb.0: # %entry 519; PWR9LE-NEXT: xvmulsp vs0, v2, v3 520; PWR9LE-NEXT: xxswapd v2, vs0 521; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 522; PWR9LE-NEXT: xxspltw vs1, vs0, 2 523; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 524; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 525; PWR9LE-NEXT: xscvspdpn f1, vs0 526; PWR9LE-NEXT: blr 527; 528; PWR9BE-LABEL: v8f32_fast: 529; PWR9BE: # %bb.0: # %entry 530; PWR9BE-NEXT: xvmulsp vs0, v2, v3 531; PWR9BE-NEXT: xxswapd v2, vs0 532; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 533; PWR9BE-NEXT: xxspltw vs1, vs0, 1 534; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 535; PWR9BE-NEXT: xscvspdpn f1, vs0 536; PWR9BE-NEXT: blr 537; 538; PWR10LE-LABEL: v8f32_fast: 539; PWR10LE: # %bb.0: # %entry 540; PWR10LE-NEXT: xvmulsp vs0, v2, v3 541; PWR10LE-NEXT: xxswapd v2, vs0 542; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 543; PWR10LE-NEXT: xxspltw vs1, vs0, 2 544; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 545; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 546; PWR10LE-NEXT: xscvspdpn f1, vs0 547; PWR10LE-NEXT: blr 548; 549; PWR10BE-LABEL: v8f32_fast: 550; PWR10BE: # %bb.0: # %entry 551; PWR10BE-NEXT: xvmulsp vs0, v2, v3 552; PWR10BE-NEXT: xxswapd v2, vs0 553; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 554; PWR10BE-NEXT: xxspltw vs1, vs0, 1 555; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 556; PWR10BE-NEXT: xscvspdpn f1, vs0 557; PWR10BE-NEXT: blr 558entry: 559 %0 = call fast float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) 560 ret float %0 561} 562 563define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 { 564; PWR9LE-LABEL: v16f32: 565; PWR9LE: # %bb.0: # %entry 566; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 567; PWR9LE-NEXT: xxswapd vs1, v2 568; PWR9LE-NEXT: xscvspdpn f0, vs0 569; PWR9LE-NEXT: xscvspdpn f1, vs1 570; PWR9LE-NEXT: xsmulsp f0, f0, f1 571; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 572; PWR9LE-NEXT: xscvspdpn f1, vs1 573; PWR9LE-NEXT: xsmulsp f0, f0, f1 574; PWR9LE-NEXT: xscvspdpn f1, v2 575; PWR9LE-NEXT: xsmulsp f0, f0, f1 576; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 577; PWR9LE-NEXT: xscvspdpn f1, vs1 578; PWR9LE-NEXT: xsmulsp f0, f0, f1 579; PWR9LE-NEXT: xxswapd vs1, v3 580; PWR9LE-NEXT: xscvspdpn f1, vs1 581; PWR9LE-NEXT: xsmulsp f0, f0, f1 582; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 583; PWR9LE-NEXT: xscvspdpn f1, vs1 584; PWR9LE-NEXT: xsmulsp f0, f0, f1 585; PWR9LE-NEXT: xscvspdpn f1, v3 586; PWR9LE-NEXT: xsmulsp f0, f0, f1 587; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 588; PWR9LE-NEXT: xscvspdpn f1, vs1 589; PWR9LE-NEXT: xsmulsp f0, f0, f1 590; PWR9LE-NEXT: xxswapd vs1, v4 591; PWR9LE-NEXT: xscvspdpn f1, vs1 592; PWR9LE-NEXT: xsmulsp f0, f0, f1 593; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 594; PWR9LE-NEXT: xscvspdpn f1, vs1 595; PWR9LE-NEXT: xsmulsp f0, f0, f1 596; PWR9LE-NEXT: xscvspdpn f1, v4 597; PWR9LE-NEXT: xsmulsp f0, f0, f1 598; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 599; PWR9LE-NEXT: xscvspdpn f1, vs1 600; PWR9LE-NEXT: xsmulsp f0, f0, f1 601; PWR9LE-NEXT: xxswapd vs1, v5 602; PWR9LE-NEXT: xscvspdpn f1, vs1 603; PWR9LE-NEXT: xsmulsp f0, f0, f1 604; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 605; PWR9LE-NEXT: xscvspdpn f1, vs1 606; PWR9LE-NEXT: xsmulsp f0, f0, f1 607; PWR9LE-NEXT: xscvspdpn f1, v5 608; PWR9LE-NEXT: xsmulsp f1, f0, f1 609; PWR9LE-NEXT: blr 610; 611; PWR9BE-LABEL: v16f32: 612; PWR9BE: # %bb.0: # %entry 613; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 614; PWR9BE-NEXT: xscvspdpn f0, v2 615; PWR9BE-NEXT: xscvspdpn f1, vs1 616; PWR9BE-NEXT: xsmulsp f0, f0, f1 617; PWR9BE-NEXT: xxswapd vs1, v2 618; PWR9BE-NEXT: xscvspdpn f1, vs1 619; PWR9BE-NEXT: xsmulsp f0, f0, f1 620; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 621; PWR9BE-NEXT: xscvspdpn f1, vs1 622; PWR9BE-NEXT: xsmulsp f0, f0, f1 623; PWR9BE-NEXT: xscvspdpn f1, v3 624; PWR9BE-NEXT: xsmulsp f0, f0, f1 625; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 626; PWR9BE-NEXT: xscvspdpn f1, vs1 627; PWR9BE-NEXT: xsmulsp f0, f0, f1 628; PWR9BE-NEXT: xxswapd vs1, v3 629; PWR9BE-NEXT: xscvspdpn f1, vs1 630; PWR9BE-NEXT: xsmulsp f0, f0, f1 631; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 632; PWR9BE-NEXT: xscvspdpn f1, vs1 633; PWR9BE-NEXT: xsmulsp f0, f0, f1 634; PWR9BE-NEXT: xscvspdpn f1, v4 635; PWR9BE-NEXT: xsmulsp f0, f0, f1 636; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 637; PWR9BE-NEXT: xscvspdpn f1, vs1 638; PWR9BE-NEXT: xsmulsp f0, f0, f1 639; PWR9BE-NEXT: xxswapd vs1, v4 640; PWR9BE-NEXT: xscvspdpn f1, vs1 641; PWR9BE-NEXT: xsmulsp f0, f0, f1 642; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 643; PWR9BE-NEXT: xscvspdpn f1, vs1 644; PWR9BE-NEXT: xsmulsp f0, f0, f1 645; PWR9BE-NEXT: xscvspdpn f1, v5 646; PWR9BE-NEXT: xsmulsp f0, f0, f1 647; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 648; PWR9BE-NEXT: xscvspdpn f1, vs1 649; PWR9BE-NEXT: xsmulsp f0, f0, f1 650; PWR9BE-NEXT: xxswapd vs1, v5 651; PWR9BE-NEXT: xscvspdpn f1, vs1 652; PWR9BE-NEXT: xsmulsp f0, f0, f1 653; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 654; PWR9BE-NEXT: xscvspdpn f1, vs1 655; PWR9BE-NEXT: xsmulsp f1, f0, f1 656; PWR9BE-NEXT: blr 657; 658; PWR10LE-LABEL: v16f32: 659; PWR10LE: # %bb.0: # %entry 660; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 661; PWR10LE-NEXT: xxswapd vs1, v2 662; PWR10LE-NEXT: xscvspdpn f0, vs0 663; PWR10LE-NEXT: xscvspdpn f1, vs1 664; PWR10LE-NEXT: xsmulsp f0, f0, f1 665; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 666; PWR10LE-NEXT: xscvspdpn f1, vs1 667; PWR10LE-NEXT: xsmulsp f0, f0, f1 668; PWR10LE-NEXT: xscvspdpn f1, v2 669; PWR10LE-NEXT: xsmulsp f0, f0, f1 670; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 671; PWR10LE-NEXT: xscvspdpn f1, vs1 672; PWR10LE-NEXT: xsmulsp f0, f0, f1 673; PWR10LE-NEXT: xxswapd vs1, v3 674; PWR10LE-NEXT: xscvspdpn f1, vs1 675; PWR10LE-NEXT: xsmulsp f0, f0, f1 676; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 677; PWR10LE-NEXT: xscvspdpn f1, vs1 678; PWR10LE-NEXT: xsmulsp f0, f0, f1 679; PWR10LE-NEXT: xscvspdpn f1, v3 680; PWR10LE-NEXT: xsmulsp f0, f0, f1 681; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 682; PWR10LE-NEXT: xscvspdpn f1, vs1 683; PWR10LE-NEXT: xsmulsp f0, f0, f1 684; PWR10LE-NEXT: xxswapd vs1, v4 685; PWR10LE-NEXT: xscvspdpn f1, vs1 686; PWR10LE-NEXT: xsmulsp f0, f0, f1 687; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 688; PWR10LE-NEXT: xscvspdpn f1, vs1 689; PWR10LE-NEXT: xsmulsp f0, f0, f1 690; PWR10LE-NEXT: xscvspdpn f1, v4 691; PWR10LE-NEXT: xsmulsp f0, f0, f1 692; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 693; PWR10LE-NEXT: xscvspdpn f1, vs1 694; PWR10LE-NEXT: xsmulsp f0, f0, f1 695; PWR10LE-NEXT: xxswapd vs1, v5 696; PWR10LE-NEXT: xscvspdpn f1, vs1 697; PWR10LE-NEXT: xsmulsp f0, f0, f1 698; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 699; PWR10LE-NEXT: xscvspdpn f1, vs1 700; PWR10LE-NEXT: xsmulsp f0, f0, f1 701; PWR10LE-NEXT: xscvspdpn f1, v5 702; PWR10LE-NEXT: xsmulsp f1, f0, f1 703; PWR10LE-NEXT: blr 704; 705; PWR10BE-LABEL: v16f32: 706; PWR10BE: # %bb.0: # %entry 707; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 708; PWR10BE-NEXT: xscvspdpn f0, v2 709; PWR10BE-NEXT: xscvspdpn f1, vs1 710; PWR10BE-NEXT: xsmulsp f0, f0, f1 711; PWR10BE-NEXT: xxswapd vs1, v2 712; PWR10BE-NEXT: xscvspdpn f1, vs1 713; PWR10BE-NEXT: xsmulsp f0, f0, f1 714; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 715; PWR10BE-NEXT: xscvspdpn f1, vs1 716; PWR10BE-NEXT: xsmulsp f0, f0, f1 717; PWR10BE-NEXT: xscvspdpn f1, v3 718; PWR10BE-NEXT: xsmulsp f0, f0, f1 719; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 720; PWR10BE-NEXT: xscvspdpn f1, vs1 721; PWR10BE-NEXT: xsmulsp f0, f0, f1 722; PWR10BE-NEXT: xxswapd vs1, v3 723; PWR10BE-NEXT: xscvspdpn f1, vs1 724; PWR10BE-NEXT: xsmulsp f0, f0, f1 725; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 726; PWR10BE-NEXT: xscvspdpn f1, vs1 727; PWR10BE-NEXT: xsmulsp f0, f0, f1 728; PWR10BE-NEXT: xscvspdpn f1, v4 729; PWR10BE-NEXT: xsmulsp f0, f0, f1 730; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 731; PWR10BE-NEXT: xscvspdpn f1, vs1 732; PWR10BE-NEXT: xsmulsp f0, f0, f1 733; PWR10BE-NEXT: xxswapd vs1, v4 734; PWR10BE-NEXT: xscvspdpn f1, vs1 735; PWR10BE-NEXT: xsmulsp f0, f0, f1 736; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 737; PWR10BE-NEXT: xscvspdpn f1, vs1 738; PWR10BE-NEXT: xsmulsp f0, f0, f1 739; PWR10BE-NEXT: xscvspdpn f1, v5 740; PWR10BE-NEXT: xsmulsp f0, f0, f1 741; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 742; PWR10BE-NEXT: xscvspdpn f1, vs1 743; PWR10BE-NEXT: xsmulsp f0, f0, f1 744; PWR10BE-NEXT: xxswapd vs1, v5 745; PWR10BE-NEXT: xscvspdpn f1, vs1 746; PWR10BE-NEXT: xsmulsp f0, f0, f1 747; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 748; PWR10BE-NEXT: xscvspdpn f1, vs1 749; PWR10BE-NEXT: xsmulsp f1, f0, f1 750; PWR10BE-NEXT: blr 751entry: 752 %0 = call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) 753 ret float %0 754} 755 756define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 { 757; PWR9LE-LABEL: v16f32_b: 758; PWR9LE: # %bb.0: # %entry 759; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 760; PWR9LE-NEXT: xscvspdpn f0, vs0 761; PWR9LE-NEXT: xsmulsp f0, f1, f0 762; PWR9LE-NEXT: xxswapd vs1, v2 763; PWR9LE-NEXT: xscvspdpn f1, vs1 764; PWR9LE-NEXT: xsmulsp f0, f0, f1 765; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 766; PWR9LE-NEXT: xscvspdpn f1, vs1 767; PWR9LE-NEXT: xsmulsp f0, f0, f1 768; PWR9LE-NEXT: xscvspdpn f1, v2 769; PWR9LE-NEXT: xsmulsp f0, f0, f1 770; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 771; PWR9LE-NEXT: xscvspdpn f1, vs1 772; PWR9LE-NEXT: xsmulsp f0, f0, f1 773; PWR9LE-NEXT: xxswapd vs1, v3 774; PWR9LE-NEXT: xscvspdpn f1, vs1 775; PWR9LE-NEXT: xsmulsp f0, f0, f1 776; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 777; PWR9LE-NEXT: xscvspdpn f1, vs1 778; PWR9LE-NEXT: xsmulsp f0, f0, f1 779; PWR9LE-NEXT: xscvspdpn f1, v3 780; PWR9LE-NEXT: xsmulsp f0, f0, f1 781; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 782; PWR9LE-NEXT: xscvspdpn f1, vs1 783; PWR9LE-NEXT: xsmulsp f0, f0, f1 784; PWR9LE-NEXT: xxswapd vs1, v4 785; PWR9LE-NEXT: xscvspdpn f1, vs1 786; PWR9LE-NEXT: xsmulsp f0, f0, f1 787; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 788; PWR9LE-NEXT: xscvspdpn f1, vs1 789; PWR9LE-NEXT: xsmulsp f0, f0, f1 790; PWR9LE-NEXT: xscvspdpn f1, v4 791; PWR9LE-NEXT: xsmulsp f0, f0, f1 792; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 793; PWR9LE-NEXT: xscvspdpn f1, vs1 794; PWR9LE-NEXT: xsmulsp f0, f0, f1 795; PWR9LE-NEXT: xxswapd vs1, v5 796; PWR9LE-NEXT: xscvspdpn f1, vs1 797; PWR9LE-NEXT: xsmulsp f0, f0, f1 798; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 799; PWR9LE-NEXT: xscvspdpn f1, vs1 800; PWR9LE-NEXT: xsmulsp f0, f0, f1 801; PWR9LE-NEXT: xscvspdpn f1, v5 802; PWR9LE-NEXT: xsmulsp f1, f0, f1 803; PWR9LE-NEXT: blr 804; 805; PWR9BE-LABEL: v16f32_b: 806; PWR9BE: # %bb.0: # %entry 807; PWR9BE-NEXT: xscvspdpn f0, v2 808; PWR9BE-NEXT: xsmulsp f0, f1, f0 809; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 810; PWR9BE-NEXT: xscvspdpn f1, vs1 811; PWR9BE-NEXT: xsmulsp f0, f0, f1 812; PWR9BE-NEXT: xxswapd vs1, v2 813; PWR9BE-NEXT: xscvspdpn f1, vs1 814; PWR9BE-NEXT: xsmulsp f0, f0, f1 815; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 816; PWR9BE-NEXT: xscvspdpn f1, vs1 817; PWR9BE-NEXT: xsmulsp f0, f0, f1 818; PWR9BE-NEXT: xscvspdpn f1, v3 819; PWR9BE-NEXT: xsmulsp f0, f0, f1 820; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 821; PWR9BE-NEXT: xscvspdpn f1, vs1 822; PWR9BE-NEXT: xsmulsp f0, f0, f1 823; PWR9BE-NEXT: xxswapd vs1, v3 824; PWR9BE-NEXT: xscvspdpn f1, vs1 825; PWR9BE-NEXT: xsmulsp f0, f0, f1 826; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 827; PWR9BE-NEXT: xscvspdpn f1, vs1 828; PWR9BE-NEXT: xsmulsp f0, f0, f1 829; PWR9BE-NEXT: xscvspdpn f1, v4 830; PWR9BE-NEXT: xsmulsp f0, f0, f1 831; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 832; PWR9BE-NEXT: xscvspdpn f1, vs1 833; PWR9BE-NEXT: xsmulsp f0, f0, f1 834; PWR9BE-NEXT: xxswapd vs1, v4 835; PWR9BE-NEXT: xscvspdpn f1, vs1 836; PWR9BE-NEXT: xsmulsp f0, f0, f1 837; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 838; PWR9BE-NEXT: xscvspdpn f1, vs1 839; PWR9BE-NEXT: xsmulsp f0, f0, f1 840; PWR9BE-NEXT: xscvspdpn f1, v5 841; PWR9BE-NEXT: xsmulsp f0, f0, f1 842; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 843; PWR9BE-NEXT: xscvspdpn f1, vs1 844; PWR9BE-NEXT: xsmulsp f0, f0, f1 845; PWR9BE-NEXT: xxswapd vs1, v5 846; PWR9BE-NEXT: xscvspdpn f1, vs1 847; PWR9BE-NEXT: xsmulsp f0, f0, f1 848; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 849; PWR9BE-NEXT: xscvspdpn f1, vs1 850; PWR9BE-NEXT: xsmulsp f1, f0, f1 851; PWR9BE-NEXT: blr 852; 853; PWR10LE-LABEL: v16f32_b: 854; PWR10LE: # %bb.0: # %entry 855; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 856; PWR10LE-NEXT: xscvspdpn f0, vs0 857; PWR10LE-NEXT: xsmulsp f0, f1, f0 858; PWR10LE-NEXT: xxswapd vs1, v2 859; PWR10LE-NEXT: xscvspdpn f1, vs1 860; PWR10LE-NEXT: xsmulsp f0, f0, f1 861; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 862; PWR10LE-NEXT: xscvspdpn f1, vs1 863; PWR10LE-NEXT: xsmulsp f0, f0, f1 864; PWR10LE-NEXT: xscvspdpn f1, v2 865; PWR10LE-NEXT: xsmulsp f0, f0, f1 866; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 867; PWR10LE-NEXT: xscvspdpn f1, vs1 868; PWR10LE-NEXT: xsmulsp f0, f0, f1 869; PWR10LE-NEXT: xxswapd vs1, v3 870; PWR10LE-NEXT: xscvspdpn f1, vs1 871; PWR10LE-NEXT: xsmulsp f0, f0, f1 872; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 873; PWR10LE-NEXT: xscvspdpn f1, vs1 874; PWR10LE-NEXT: xsmulsp f0, f0, f1 875; PWR10LE-NEXT: xscvspdpn f1, v3 876; PWR10LE-NEXT: xsmulsp f0, f0, f1 877; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 878; PWR10LE-NEXT: xscvspdpn f1, vs1 879; PWR10LE-NEXT: xsmulsp f0, f0, f1 880; PWR10LE-NEXT: xxswapd vs1, v4 881; PWR10LE-NEXT: xscvspdpn f1, vs1 882; PWR10LE-NEXT: xsmulsp f0, f0, f1 883; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 884; PWR10LE-NEXT: xscvspdpn f1, vs1 885; PWR10LE-NEXT: xsmulsp f0, f0, f1 886; PWR10LE-NEXT: xscvspdpn f1, v4 887; PWR10LE-NEXT: xsmulsp f0, f0, f1 888; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 889; PWR10LE-NEXT: xscvspdpn f1, vs1 890; PWR10LE-NEXT: xsmulsp f0, f0, f1 891; PWR10LE-NEXT: xxswapd vs1, v5 892; PWR10LE-NEXT: xscvspdpn f1, vs1 893; PWR10LE-NEXT: xsmulsp f0, f0, f1 894; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 895; PWR10LE-NEXT: xscvspdpn f1, vs1 896; PWR10LE-NEXT: xsmulsp f0, f0, f1 897; PWR10LE-NEXT: xscvspdpn f1, v5 898; PWR10LE-NEXT: xsmulsp f1, f0, f1 899; PWR10LE-NEXT: blr 900; 901; PWR10BE-LABEL: v16f32_b: 902; PWR10BE: # %bb.0: # %entry 903; PWR10BE-NEXT: xscvspdpn f0, v2 904; PWR10BE-NEXT: xsmulsp f0, f1, f0 905; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 906; PWR10BE-NEXT: xscvspdpn f1, vs1 907; PWR10BE-NEXT: xsmulsp f0, f0, f1 908; PWR10BE-NEXT: xxswapd vs1, v2 909; PWR10BE-NEXT: xscvspdpn f1, vs1 910; PWR10BE-NEXT: xsmulsp f0, f0, f1 911; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 912; PWR10BE-NEXT: xscvspdpn f1, vs1 913; PWR10BE-NEXT: xsmulsp f0, f0, f1 914; PWR10BE-NEXT: xscvspdpn f1, v3 915; PWR10BE-NEXT: xsmulsp f0, f0, f1 916; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 917; PWR10BE-NEXT: xscvspdpn f1, vs1 918; PWR10BE-NEXT: xsmulsp f0, f0, f1 919; PWR10BE-NEXT: xxswapd vs1, v3 920; PWR10BE-NEXT: xscvspdpn f1, vs1 921; PWR10BE-NEXT: xsmulsp f0, f0, f1 922; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 923; PWR10BE-NEXT: xscvspdpn f1, vs1 924; PWR10BE-NEXT: xsmulsp f0, f0, f1 925; PWR10BE-NEXT: xscvspdpn f1, v4 926; PWR10BE-NEXT: xsmulsp f0, f0, f1 927; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 928; PWR10BE-NEXT: xscvspdpn f1, vs1 929; PWR10BE-NEXT: xsmulsp f0, f0, f1 930; PWR10BE-NEXT: xxswapd vs1, v4 931; PWR10BE-NEXT: xscvspdpn f1, vs1 932; PWR10BE-NEXT: xsmulsp f0, f0, f1 933; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 934; PWR10BE-NEXT: xscvspdpn f1, vs1 935; PWR10BE-NEXT: xsmulsp f0, f0, f1 936; PWR10BE-NEXT: xscvspdpn f1, v5 937; PWR10BE-NEXT: xsmulsp f0, f0, f1 938; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 939; PWR10BE-NEXT: xscvspdpn f1, vs1 940; PWR10BE-NEXT: xsmulsp f0, f0, f1 941; PWR10BE-NEXT: xxswapd vs1, v5 942; PWR10BE-NEXT: xscvspdpn f1, vs1 943; PWR10BE-NEXT: xsmulsp f0, f0, f1 944; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 945; PWR10BE-NEXT: xscvspdpn f1, vs1 946; PWR10BE-NEXT: xsmulsp f1, f0, f1 947; PWR10BE-NEXT: blr 948entry: 949 %0 = call float @llvm.vector.reduce.fmul.v16f32(float %b, <16 x float> %a) 950 ret float %0 951} 952 953define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 { 954; PWR9LE-LABEL: v16f32_fast: 955; PWR9LE: # %bb.0: # %entry 956; PWR9LE-NEXT: xvmulsp vs0, v3, v5 957; PWR9LE-NEXT: xvmulsp vs1, v2, v4 958; PWR9LE-NEXT: xvmulsp vs0, vs1, vs0 959; PWR9LE-NEXT: xxswapd v2, vs0 960; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 961; PWR9LE-NEXT: xxspltw vs1, vs0, 2 962; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 963; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 964; PWR9LE-NEXT: xscvspdpn f1, vs0 965; PWR9LE-NEXT: blr 966; 967; PWR9BE-LABEL: v16f32_fast: 968; PWR9BE: # %bb.0: # %entry 969; PWR9BE-NEXT: xvmulsp vs0, v3, v5 970; PWR9BE-NEXT: xvmulsp vs1, v2, v4 971; PWR9BE-NEXT: xvmulsp vs0, vs1, vs0 972; PWR9BE-NEXT: xxswapd v2, vs0 973; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 974; PWR9BE-NEXT: xxspltw vs1, vs0, 1 975; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 976; PWR9BE-NEXT: xscvspdpn f1, vs0 977; PWR9BE-NEXT: blr 978; 979; PWR10LE-LABEL: v16f32_fast: 980; PWR10LE: # %bb.0: # %entry 981; PWR10LE-NEXT: xvmulsp vs0, v3, v5 982; PWR10LE-NEXT: xvmulsp vs1, v2, v4 983; PWR10LE-NEXT: xvmulsp vs0, vs1, vs0 984; PWR10LE-NEXT: xxswapd v2, vs0 985; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 986; PWR10LE-NEXT: xxspltw vs1, vs0, 2 987; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 988; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 989; PWR10LE-NEXT: xscvspdpn f1, vs0 990; PWR10LE-NEXT: blr 991; 992; PWR10BE-LABEL: v16f32_fast: 993; PWR10BE: # %bb.0: # %entry 994; PWR10BE-NEXT: xvmulsp vs0, v3, v5 995; PWR10BE-NEXT: xvmulsp vs1, v2, v4 996; PWR10BE-NEXT: xvmulsp vs0, vs1, vs0 997; PWR10BE-NEXT: xxswapd v2, vs0 998; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 999; PWR10BE-NEXT: xxspltw vs1, vs0, 1 1000; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 1001; PWR10BE-NEXT: xscvspdpn f1, vs0 1002; PWR10BE-NEXT: blr 1003entry: 1004 %0 = call fast float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) 1005 ret float %0 1006} 1007 1008declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>) #0 1009declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) #0 1010declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) #0 1011declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>) #0 1012 1013;; 1014;; Vectors of f64 1015;; 1016define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 { 1017; PWR9LE-LABEL: v2f64: 1018; PWR9LE: # %bb.0: # %entry 1019; PWR9LE-NEXT: xxswapd vs0, v2 1020; PWR9LE-NEXT: xsmuldp f1, f0, v2 1021; PWR9LE-NEXT: blr 1022; 1023; PWR9BE-LABEL: v2f64: 1024; PWR9BE: # %bb.0: # %entry 1025; PWR9BE-NEXT: xxswapd vs0, v2 1026; PWR9BE-NEXT: xsmuldp f1, v2, f0 1027; PWR9BE-NEXT: blr 1028; 1029; PWR10LE-LABEL: v2f64: 1030; PWR10LE: # %bb.0: # %entry 1031; PWR10LE-NEXT: xxswapd vs0, v2 1032; PWR10LE-NEXT: xsmuldp f1, f0, v2 1033; PWR10LE-NEXT: blr 1034; 1035; PWR10BE-LABEL: v2f64: 1036; PWR10BE: # %bb.0: # %entry 1037; PWR10BE-NEXT: xxswapd vs0, v2 1038; PWR10BE-NEXT: xsmuldp f1, v2, f0 1039; PWR10BE-NEXT: blr 1040entry: 1041 %0 = call double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) 1042 ret double %0 1043} 1044 1045define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 { 1046; PWR9LE-LABEL: v2f64_b: 1047; PWR9LE: # %bb.0: # %entry 1048; PWR9LE-NEXT: xxswapd vs0, v2 1049; PWR9LE-NEXT: xsmuldp f0, f1, f0 1050; PWR9LE-NEXT: xsmuldp f1, f0, v2 1051; PWR9LE-NEXT: blr 1052; 1053; PWR9BE-LABEL: v2f64_b: 1054; PWR9BE: # %bb.0: # %entry 1055; PWR9BE-NEXT: xsmuldp f0, f1, v2 1056; PWR9BE-NEXT: xxswapd vs1, v2 1057; PWR9BE-NEXT: xsmuldp f1, f0, f1 1058; PWR9BE-NEXT: blr 1059; 1060; PWR10LE-LABEL: v2f64_b: 1061; PWR10LE: # %bb.0: # %entry 1062; PWR10LE-NEXT: xxswapd vs0, v2 1063; PWR10LE-NEXT: xsmuldp f0, f1, f0 1064; PWR10LE-NEXT: xsmuldp f1, f0, v2 1065; PWR10LE-NEXT: blr 1066; 1067; PWR10BE-LABEL: v2f64_b: 1068; PWR10BE: # %bb.0: # %entry 1069; PWR10BE-NEXT: xsmuldp f0, f1, v2 1070; PWR10BE-NEXT: xxswapd vs1, v2 1071; PWR10BE-NEXT: xsmuldp f1, f0, f1 1072; PWR10BE-NEXT: blr 1073entry: 1074 %0 = call double @llvm.vector.reduce.fmul.v2f64(double %b, <2 x double> %a) 1075 ret double %0 1076} 1077 1078define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 { 1079; PWR9LE-LABEL: v2f64_fast: 1080; PWR9LE: # %bb.0: # %entry 1081; PWR9LE-NEXT: xxswapd vs0, v2 1082; PWR9LE-NEXT: xvmuldp vs0, v2, vs0 1083; PWR9LE-NEXT: xxswapd vs1, vs0 1084; PWR9LE-NEXT: blr 1085; 1086; PWR9BE-LABEL: v2f64_fast: 1087; PWR9BE: # %bb.0: # %entry 1088; PWR9BE-NEXT: xxswapd vs0, v2 1089; PWR9BE-NEXT: xvmuldp vs1, v2, vs0 1090; PWR9BE-NEXT: blr 1091; 1092; PWR10LE-LABEL: v2f64_fast: 1093; PWR10LE: # %bb.0: # %entry 1094; PWR10LE-NEXT: xxswapd vs0, v2 1095; PWR10LE-NEXT: xvmuldp vs0, v2, vs0 1096; PWR10LE-NEXT: xxswapd vs1, vs0 1097; PWR10LE-NEXT: blr 1098; 1099; PWR10BE-LABEL: v2f64_fast: 1100; PWR10BE: # %bb.0: # %entry 1101; PWR10BE-NEXT: xxswapd vs0, v2 1102; PWR10BE-NEXT: xvmuldp vs1, v2, vs0 1103; PWR10BE-NEXT: blr 1104entry: 1105 %0 = call fast double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) 1106 ret double %0 1107} 1108 1109define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 { 1110; PWR9LE-LABEL: v4f64: 1111; PWR9LE: # %bb.0: # %entry 1112; PWR9LE-NEXT: xxswapd vs0, v2 1113; PWR9LE-NEXT: xxswapd vs1, v3 1114; PWR9LE-NEXT: xsmuldp f0, f0, v2 1115; PWR9LE-NEXT: xsmuldp f0, f0, f1 1116; PWR9LE-NEXT: xsmuldp f1, f0, v3 1117; PWR9LE-NEXT: blr 1118; 1119; PWR9BE-LABEL: v4f64: 1120; PWR9BE: # %bb.0: # %entry 1121; PWR9BE-NEXT: xxswapd vs0, v2 1122; PWR9BE-NEXT: xxswapd vs1, v3 1123; PWR9BE-NEXT: xsmuldp f0, v2, f0 1124; PWR9BE-NEXT: xsmuldp f0, f0, v3 1125; PWR9BE-NEXT: xsmuldp f1, f0, f1 1126; PWR9BE-NEXT: blr 1127; 1128; PWR10LE-LABEL: v4f64: 1129; PWR10LE: # %bb.0: # %entry 1130; PWR10LE-NEXT: xxswapd vs0, v2 1131; PWR10LE-NEXT: xxswapd vs1, v3 1132; PWR10LE-NEXT: xsmuldp f0, f0, v2 1133; PWR10LE-NEXT: xsmuldp f0, f0, f1 1134; PWR10LE-NEXT: xsmuldp f1, f0, v3 1135; PWR10LE-NEXT: blr 1136; 1137; PWR10BE-LABEL: v4f64: 1138; PWR10BE: # %bb.0: # %entry 1139; PWR10BE-NEXT: xxswapd vs0, v2 1140; PWR10BE-NEXT: xxswapd vs1, v3 1141; PWR10BE-NEXT: xsmuldp f0, v2, f0 1142; PWR10BE-NEXT: xsmuldp f0, f0, v3 1143; PWR10BE-NEXT: xsmuldp f1, f0, f1 1144; PWR10BE-NEXT: blr 1145entry: 1146 %0 = call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) 1147 ret double %0 1148} 1149 1150define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 { 1151; PWR9LE-LABEL: v4f64_b: 1152; PWR9LE: # %bb.0: # %entry 1153; PWR9LE-NEXT: xxswapd vs0, v2 1154; PWR9LE-NEXT: xsmuldp f0, f1, f0 1155; PWR9LE-NEXT: xxswapd vs1, v3 1156; PWR9LE-NEXT: xsmuldp f0, f0, v2 1157; PWR9LE-NEXT: xsmuldp f0, f0, f1 1158; PWR9LE-NEXT: xsmuldp f1, f0, v3 1159; PWR9LE-NEXT: blr 1160; 1161; PWR9BE-LABEL: v4f64_b: 1162; PWR9BE: # %bb.0: # %entry 1163; PWR9BE-NEXT: xsmuldp f0, f1, v2 1164; PWR9BE-NEXT: xxswapd vs1, v2 1165; PWR9BE-NEXT: xsmuldp f0, f0, f1 1166; PWR9BE-NEXT: xxswapd vs1, v3 1167; PWR9BE-NEXT: xsmuldp f0, f0, v3 1168; PWR9BE-NEXT: xsmuldp f1, f0, f1 1169; PWR9BE-NEXT: blr 1170; 1171; PWR10LE-LABEL: v4f64_b: 1172; PWR10LE: # %bb.0: # %entry 1173; PWR10LE-NEXT: xxswapd vs0, v2 1174; PWR10LE-NEXT: xsmuldp f0, f1, f0 1175; PWR10LE-NEXT: xxswapd vs1, v3 1176; PWR10LE-NEXT: xsmuldp f0, f0, v2 1177; PWR10LE-NEXT: xsmuldp f0, f0, f1 1178; PWR10LE-NEXT: xsmuldp f1, f0, v3 1179; PWR10LE-NEXT: blr 1180; 1181; PWR10BE-LABEL: v4f64_b: 1182; PWR10BE: # %bb.0: # %entry 1183; PWR10BE-NEXT: xsmuldp f0, f1, v2 1184; PWR10BE-NEXT: xxswapd vs1, v2 1185; PWR10BE-NEXT: xsmuldp f0, f0, f1 1186; PWR10BE-NEXT: xxswapd vs1, v3 1187; PWR10BE-NEXT: xsmuldp f0, f0, v3 1188; PWR10BE-NEXT: xsmuldp f1, f0, f1 1189; PWR10BE-NEXT: blr 1190entry: 1191 %0 = call double @llvm.vector.reduce.fmul.v4f64(double %b, <4 x double> %a) 1192 ret double %0 1193} 1194 1195define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 { 1196; PWR9LE-LABEL: v4f64_fast: 1197; PWR9LE: # %bb.0: # %entry 1198; PWR9LE-NEXT: xvmuldp vs0, v2, v3 1199; PWR9LE-NEXT: xxswapd vs1, vs0 1200; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1201; PWR9LE-NEXT: xxswapd vs1, vs0 1202; PWR9LE-NEXT: blr 1203; 1204; PWR9BE-LABEL: v4f64_fast: 1205; PWR9BE: # %bb.0: # %entry 1206; PWR9BE-NEXT: xvmuldp vs0, v2, v3 1207; PWR9BE-NEXT: xxswapd vs1, vs0 1208; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1209; PWR9BE-NEXT: blr 1210; 1211; PWR10LE-LABEL: v4f64_fast: 1212; PWR10LE: # %bb.0: # %entry 1213; PWR10LE-NEXT: xvmuldp vs0, v2, v3 1214; PWR10LE-NEXT: xxswapd vs1, vs0 1215; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1216; PWR10LE-NEXT: xxswapd vs1, vs0 1217; PWR10LE-NEXT: blr 1218; 1219; PWR10BE-LABEL: v4f64_fast: 1220; PWR10BE: # %bb.0: # %entry 1221; PWR10BE-NEXT: xvmuldp vs0, v2, v3 1222; PWR10BE-NEXT: xxswapd vs1, vs0 1223; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1224; PWR10BE-NEXT: blr 1225entry: 1226 %0 = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) 1227 ret double %0 1228} 1229 1230define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 { 1231; PWR9LE-LABEL: v8f64: 1232; PWR9LE: # %bb.0: # %entry 1233; PWR9LE-NEXT: xxswapd vs0, v2 1234; PWR9LE-NEXT: xxswapd vs1, v3 1235; PWR9LE-NEXT: xsmuldp f0, f0, v2 1236; PWR9LE-NEXT: xsmuldp f0, f0, f1 1237; PWR9LE-NEXT: xxswapd vs1, v4 1238; PWR9LE-NEXT: xsmuldp f0, f0, v3 1239; PWR9LE-NEXT: xsmuldp f0, f0, f1 1240; PWR9LE-NEXT: xxswapd vs1, v5 1241; PWR9LE-NEXT: xsmuldp f0, f0, v4 1242; PWR9LE-NEXT: xsmuldp f0, f0, f1 1243; PWR9LE-NEXT: xsmuldp f1, f0, v5 1244; PWR9LE-NEXT: blr 1245; 1246; PWR9BE-LABEL: v8f64: 1247; PWR9BE: # %bb.0: # %entry 1248; PWR9BE-NEXT: xxswapd vs0, v2 1249; PWR9BE-NEXT: xxswapd vs1, v3 1250; PWR9BE-NEXT: xsmuldp f0, v2, f0 1251; PWR9BE-NEXT: xsmuldp f0, f0, v3 1252; PWR9BE-NEXT: xsmuldp f0, f0, f1 1253; PWR9BE-NEXT: xxswapd vs1, v4 1254; PWR9BE-NEXT: xsmuldp f0, f0, v4 1255; PWR9BE-NEXT: xsmuldp f0, f0, f1 1256; PWR9BE-NEXT: xxswapd vs1, v5 1257; PWR9BE-NEXT: xsmuldp f0, f0, v5 1258; PWR9BE-NEXT: xsmuldp f1, f0, f1 1259; PWR9BE-NEXT: blr 1260; 1261; PWR10LE-LABEL: v8f64: 1262; PWR10LE: # %bb.0: # %entry 1263; PWR10LE-NEXT: xxswapd vs0, v2 1264; PWR10LE-NEXT: xxswapd vs1, v3 1265; PWR10LE-NEXT: xsmuldp f0, f0, v2 1266; PWR10LE-NEXT: xsmuldp f0, f0, f1 1267; PWR10LE-NEXT: xxswapd vs1, v4 1268; PWR10LE-NEXT: xsmuldp f0, f0, v3 1269; PWR10LE-NEXT: xsmuldp f0, f0, f1 1270; PWR10LE-NEXT: xxswapd vs1, v5 1271; PWR10LE-NEXT: xsmuldp f0, f0, v4 1272; PWR10LE-NEXT: xsmuldp f0, f0, f1 1273; PWR10LE-NEXT: xsmuldp f1, f0, v5 1274; PWR10LE-NEXT: blr 1275; 1276; PWR10BE-LABEL: v8f64: 1277; PWR10BE: # %bb.0: # %entry 1278; PWR10BE-NEXT: xxswapd vs0, v2 1279; PWR10BE-NEXT: xxswapd vs1, v3 1280; PWR10BE-NEXT: xsmuldp f0, v2, f0 1281; PWR10BE-NEXT: xsmuldp f0, f0, v3 1282; PWR10BE-NEXT: xsmuldp f0, f0, f1 1283; PWR10BE-NEXT: xxswapd vs1, v4 1284; PWR10BE-NEXT: xsmuldp f0, f0, v4 1285; PWR10BE-NEXT: xsmuldp f0, f0, f1 1286; PWR10BE-NEXT: xxswapd vs1, v5 1287; PWR10BE-NEXT: xsmuldp f0, f0, v5 1288; PWR10BE-NEXT: xsmuldp f1, f0, f1 1289; PWR10BE-NEXT: blr 1290entry: 1291 %0 = call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) 1292 ret double %0 1293} 1294 1295define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 { 1296; PWR9LE-LABEL: v8f64_b: 1297; PWR9LE: # %bb.0: # %entry 1298; PWR9LE-NEXT: xxswapd vs0, v2 1299; PWR9LE-NEXT: xsmuldp f0, f1, f0 1300; PWR9LE-NEXT: xxswapd vs1, v3 1301; PWR9LE-NEXT: xsmuldp f0, f0, v2 1302; PWR9LE-NEXT: xsmuldp f0, f0, f1 1303; PWR9LE-NEXT: xxswapd vs1, v4 1304; PWR9LE-NEXT: xsmuldp f0, f0, v3 1305; PWR9LE-NEXT: xsmuldp f0, f0, f1 1306; PWR9LE-NEXT: xxswapd vs1, v5 1307; PWR9LE-NEXT: xsmuldp f0, f0, v4 1308; PWR9LE-NEXT: xsmuldp f0, f0, f1 1309; PWR9LE-NEXT: xsmuldp f1, f0, v5 1310; PWR9LE-NEXT: blr 1311; 1312; PWR9BE-LABEL: v8f64_b: 1313; PWR9BE: # %bb.0: # %entry 1314; PWR9BE-NEXT: xsmuldp f0, f1, v2 1315; PWR9BE-NEXT: xxswapd vs1, v2 1316; PWR9BE-NEXT: xsmuldp f0, f0, f1 1317; PWR9BE-NEXT: xxswapd vs1, v3 1318; PWR9BE-NEXT: xsmuldp f0, f0, v3 1319; PWR9BE-NEXT: xsmuldp f0, f0, f1 1320; PWR9BE-NEXT: xxswapd vs1, v4 1321; PWR9BE-NEXT: xsmuldp f0, f0, v4 1322; PWR9BE-NEXT: xsmuldp f0, f0, f1 1323; PWR9BE-NEXT: xxswapd vs1, v5 1324; PWR9BE-NEXT: xsmuldp f0, f0, v5 1325; PWR9BE-NEXT: xsmuldp f1, f0, f1 1326; PWR9BE-NEXT: blr 1327; 1328; PWR10LE-LABEL: v8f64_b: 1329; PWR10LE: # %bb.0: # %entry 1330; PWR10LE-NEXT: xxswapd vs0, v2 1331; PWR10LE-NEXT: xsmuldp f0, f1, f0 1332; PWR10LE-NEXT: xxswapd vs1, v3 1333; PWR10LE-NEXT: xsmuldp f0, f0, v2 1334; PWR10LE-NEXT: xsmuldp f0, f0, f1 1335; PWR10LE-NEXT: xxswapd vs1, v4 1336; PWR10LE-NEXT: xsmuldp f0, f0, v3 1337; PWR10LE-NEXT: xsmuldp f0, f0, f1 1338; PWR10LE-NEXT: xxswapd vs1, v5 1339; PWR10LE-NEXT: xsmuldp f0, f0, v4 1340; PWR10LE-NEXT: xsmuldp f0, f0, f1 1341; PWR10LE-NEXT: xsmuldp f1, f0, v5 1342; PWR10LE-NEXT: blr 1343; 1344; PWR10BE-LABEL: v8f64_b: 1345; PWR10BE: # %bb.0: # %entry 1346; PWR10BE-NEXT: xsmuldp f0, f1, v2 1347; PWR10BE-NEXT: xxswapd vs1, v2 1348; PWR10BE-NEXT: xsmuldp f0, f0, f1 1349; PWR10BE-NEXT: xxswapd vs1, v3 1350; PWR10BE-NEXT: xsmuldp f0, f0, v3 1351; PWR10BE-NEXT: xsmuldp f0, f0, f1 1352; PWR10BE-NEXT: xxswapd vs1, v4 1353; PWR10BE-NEXT: xsmuldp f0, f0, v4 1354; PWR10BE-NEXT: xsmuldp f0, f0, f1 1355; PWR10BE-NEXT: xxswapd vs1, v5 1356; PWR10BE-NEXT: xsmuldp f0, f0, v5 1357; PWR10BE-NEXT: xsmuldp f1, f0, f1 1358; PWR10BE-NEXT: blr 1359entry: 1360 %0 = call double @llvm.vector.reduce.fmul.v8f64(double %b, <8 x double> %a) 1361 ret double %0 1362} 1363 1364define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 { 1365; PWR9LE-LABEL: v8f64_fast: 1366; PWR9LE: # %bb.0: # %entry 1367; PWR9LE-NEXT: xvmuldp vs0, v3, v5 1368; PWR9LE-NEXT: xvmuldp vs1, v2, v4 1369; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 1370; PWR9LE-NEXT: xxswapd vs1, vs0 1371; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1372; PWR9LE-NEXT: xxswapd vs1, vs0 1373; PWR9LE-NEXT: blr 1374; 1375; PWR9BE-LABEL: v8f64_fast: 1376; PWR9BE: # %bb.0: # %entry 1377; PWR9BE-NEXT: xvmuldp vs0, v3, v5 1378; PWR9BE-NEXT: xvmuldp vs1, v2, v4 1379; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 1380; PWR9BE-NEXT: xxswapd vs1, vs0 1381; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1382; PWR9BE-NEXT: blr 1383; 1384; PWR10LE-LABEL: v8f64_fast: 1385; PWR10LE: # %bb.0: # %entry 1386; PWR10LE-NEXT: xvmuldp vs0, v3, v5 1387; PWR10LE-NEXT: xvmuldp vs1, v2, v4 1388; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 1389; PWR10LE-NEXT: xxswapd vs1, vs0 1390; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1391; PWR10LE-NEXT: xxswapd vs1, vs0 1392; PWR10LE-NEXT: blr 1393; 1394; PWR10BE-LABEL: v8f64_fast: 1395; PWR10BE: # %bb.0: # %entry 1396; PWR10BE-NEXT: xvmuldp vs0, v3, v5 1397; PWR10BE-NEXT: xvmuldp vs1, v2, v4 1398; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 1399; PWR10BE-NEXT: xxswapd vs1, vs0 1400; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1401; PWR10BE-NEXT: blr 1402entry: 1403 %0 = call fast double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) 1404 ret double %0 1405} 1406 1407define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 { 1408; PWR9LE-LABEL: v16f64: 1409; PWR9LE: # %bb.0: # %entry 1410; PWR9LE-NEXT: xxswapd vs0, v2 1411; PWR9LE-NEXT: xxswapd vs1, v3 1412; PWR9LE-NEXT: xsmuldp f0, f0, v2 1413; PWR9LE-NEXT: xsmuldp f0, f0, f1 1414; PWR9LE-NEXT: xxswapd vs1, v4 1415; PWR9LE-NEXT: xsmuldp f0, f0, v3 1416; PWR9LE-NEXT: xsmuldp f0, f0, f1 1417; PWR9LE-NEXT: xxswapd vs1, v5 1418; PWR9LE-NEXT: xsmuldp f0, f0, v4 1419; PWR9LE-NEXT: xsmuldp f0, f0, f1 1420; PWR9LE-NEXT: xxswapd vs1, v6 1421; PWR9LE-NEXT: xsmuldp f0, f0, v5 1422; PWR9LE-NEXT: xsmuldp f0, f0, f1 1423; PWR9LE-NEXT: xxswapd vs1, v7 1424; PWR9LE-NEXT: xsmuldp f0, f0, v6 1425; PWR9LE-NEXT: xsmuldp f0, f0, f1 1426; PWR9LE-NEXT: xxswapd vs1, v8 1427; PWR9LE-NEXT: xsmuldp f0, f0, v7 1428; PWR9LE-NEXT: xsmuldp f0, f0, f1 1429; PWR9LE-NEXT: xxswapd vs1, v9 1430; PWR9LE-NEXT: xsmuldp f0, f0, v8 1431; PWR9LE-NEXT: xsmuldp f0, f0, f1 1432; PWR9LE-NEXT: xsmuldp f1, f0, v9 1433; PWR9LE-NEXT: blr 1434; 1435; PWR9BE-LABEL: v16f64: 1436; PWR9BE: # %bb.0: # %entry 1437; PWR9BE-NEXT: xxswapd vs0, v2 1438; PWR9BE-NEXT: xxswapd vs1, v3 1439; PWR9BE-NEXT: xsmuldp f0, v2, f0 1440; PWR9BE-NEXT: xsmuldp f0, f0, v3 1441; PWR9BE-NEXT: xsmuldp f0, f0, f1 1442; PWR9BE-NEXT: xxswapd vs1, v4 1443; PWR9BE-NEXT: xsmuldp f0, f0, v4 1444; PWR9BE-NEXT: xsmuldp f0, f0, f1 1445; PWR9BE-NEXT: xxswapd vs1, v5 1446; PWR9BE-NEXT: xsmuldp f0, f0, v5 1447; PWR9BE-NEXT: xsmuldp f0, f0, f1 1448; PWR9BE-NEXT: xxswapd vs1, v6 1449; PWR9BE-NEXT: xsmuldp f0, f0, v6 1450; PWR9BE-NEXT: xsmuldp f0, f0, f1 1451; PWR9BE-NEXT: xxswapd vs1, v7 1452; PWR9BE-NEXT: xsmuldp f0, f0, v7 1453; PWR9BE-NEXT: xsmuldp f0, f0, f1 1454; PWR9BE-NEXT: xxswapd vs1, v8 1455; PWR9BE-NEXT: xsmuldp f0, f0, v8 1456; PWR9BE-NEXT: xsmuldp f0, f0, f1 1457; PWR9BE-NEXT: xxswapd vs1, v9 1458; PWR9BE-NEXT: xsmuldp f0, f0, v9 1459; PWR9BE-NEXT: xsmuldp f1, f0, f1 1460; PWR9BE-NEXT: blr 1461; 1462; PWR10LE-LABEL: v16f64: 1463; PWR10LE: # %bb.0: # %entry 1464; PWR10LE-NEXT: xxswapd vs0, v2 1465; PWR10LE-NEXT: xxswapd vs1, v3 1466; PWR10LE-NEXT: xsmuldp f0, f0, v2 1467; PWR10LE-NEXT: xsmuldp f0, f0, f1 1468; PWR10LE-NEXT: xxswapd vs1, v4 1469; PWR10LE-NEXT: xsmuldp f0, f0, v3 1470; PWR10LE-NEXT: xsmuldp f0, f0, f1 1471; PWR10LE-NEXT: xxswapd vs1, v5 1472; PWR10LE-NEXT: xsmuldp f0, f0, v4 1473; PWR10LE-NEXT: xsmuldp f0, f0, f1 1474; PWR10LE-NEXT: xxswapd vs1, v6 1475; PWR10LE-NEXT: xsmuldp f0, f0, v5 1476; PWR10LE-NEXT: xsmuldp f0, f0, f1 1477; PWR10LE-NEXT: xxswapd vs1, v7 1478; PWR10LE-NEXT: xsmuldp f0, f0, v6 1479; PWR10LE-NEXT: xsmuldp f0, f0, f1 1480; PWR10LE-NEXT: xxswapd vs1, v8 1481; PWR10LE-NEXT: xsmuldp f0, f0, v7 1482; PWR10LE-NEXT: xsmuldp f0, f0, f1 1483; PWR10LE-NEXT: xxswapd vs1, v9 1484; PWR10LE-NEXT: xsmuldp f0, f0, v8 1485; PWR10LE-NEXT: xsmuldp f0, f0, f1 1486; PWR10LE-NEXT: xsmuldp f1, f0, v9 1487; PWR10LE-NEXT: blr 1488; 1489; PWR10BE-LABEL: v16f64: 1490; PWR10BE: # %bb.0: # %entry 1491; PWR10BE-NEXT: xxswapd vs0, v2 1492; PWR10BE-NEXT: xxswapd vs1, v3 1493; PWR10BE-NEXT: xsmuldp f0, v2, f0 1494; PWR10BE-NEXT: xsmuldp f0, f0, v3 1495; PWR10BE-NEXT: xsmuldp f0, f0, f1 1496; PWR10BE-NEXT: xxswapd vs1, v4 1497; PWR10BE-NEXT: xsmuldp f0, f0, v4 1498; PWR10BE-NEXT: xsmuldp f0, f0, f1 1499; PWR10BE-NEXT: xxswapd vs1, v5 1500; PWR10BE-NEXT: xsmuldp f0, f0, v5 1501; PWR10BE-NEXT: xsmuldp f0, f0, f1 1502; PWR10BE-NEXT: xxswapd vs1, v6 1503; PWR10BE-NEXT: xsmuldp f0, f0, v6 1504; PWR10BE-NEXT: xsmuldp f0, f0, f1 1505; PWR10BE-NEXT: xxswapd vs1, v7 1506; PWR10BE-NEXT: xsmuldp f0, f0, v7 1507; PWR10BE-NEXT: xsmuldp f0, f0, f1 1508; PWR10BE-NEXT: xxswapd vs1, v8 1509; PWR10BE-NEXT: xsmuldp f0, f0, v8 1510; PWR10BE-NEXT: xsmuldp f0, f0, f1 1511; PWR10BE-NEXT: xxswapd vs1, v9 1512; PWR10BE-NEXT: xsmuldp f0, f0, v9 1513; PWR10BE-NEXT: xsmuldp f1, f0, f1 1514; PWR10BE-NEXT: blr 1515entry: 1516 %0 = call double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) 1517 ret double %0 1518} 1519 1520define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 { 1521; PWR9LE-LABEL: v16f64_b: 1522; PWR9LE: # %bb.0: # %entry 1523; PWR9LE-NEXT: xxswapd vs0, v2 1524; PWR9LE-NEXT: xsmuldp f0, f1, f0 1525; PWR9LE-NEXT: xxswapd vs1, v3 1526; PWR9LE-NEXT: xsmuldp f0, f0, v2 1527; PWR9LE-NEXT: xsmuldp f0, f0, f1 1528; PWR9LE-NEXT: xxswapd vs1, v4 1529; PWR9LE-NEXT: xsmuldp f0, f0, v3 1530; PWR9LE-NEXT: xsmuldp f0, f0, f1 1531; PWR9LE-NEXT: xxswapd vs1, v5 1532; PWR9LE-NEXT: xsmuldp f0, f0, v4 1533; PWR9LE-NEXT: xsmuldp f0, f0, f1 1534; PWR9LE-NEXT: xxswapd vs1, v6 1535; PWR9LE-NEXT: xsmuldp f0, f0, v5 1536; PWR9LE-NEXT: xsmuldp f0, f0, f1 1537; PWR9LE-NEXT: xxswapd vs1, v7 1538; PWR9LE-NEXT: xsmuldp f0, f0, v6 1539; PWR9LE-NEXT: xsmuldp f0, f0, f1 1540; PWR9LE-NEXT: xxswapd vs1, v8 1541; PWR9LE-NEXT: xsmuldp f0, f0, v7 1542; PWR9LE-NEXT: xsmuldp f0, f0, f1 1543; PWR9LE-NEXT: xxswapd vs1, v9 1544; PWR9LE-NEXT: xsmuldp f0, f0, v8 1545; PWR9LE-NEXT: xsmuldp f0, f0, f1 1546; PWR9LE-NEXT: xsmuldp f1, f0, v9 1547; PWR9LE-NEXT: blr 1548; 1549; PWR9BE-LABEL: v16f64_b: 1550; PWR9BE: # %bb.0: # %entry 1551; PWR9BE-NEXT: xsmuldp f0, f1, v2 1552; PWR9BE-NEXT: xxswapd vs1, v2 1553; PWR9BE-NEXT: xsmuldp f0, f0, f1 1554; PWR9BE-NEXT: xxswapd vs1, v3 1555; PWR9BE-NEXT: xsmuldp f0, f0, v3 1556; PWR9BE-NEXT: xsmuldp f0, f0, f1 1557; PWR9BE-NEXT: xxswapd vs1, v4 1558; PWR9BE-NEXT: xsmuldp f0, f0, v4 1559; PWR9BE-NEXT: xsmuldp f0, f0, f1 1560; PWR9BE-NEXT: xxswapd vs1, v5 1561; PWR9BE-NEXT: xsmuldp f0, f0, v5 1562; PWR9BE-NEXT: xsmuldp f0, f0, f1 1563; PWR9BE-NEXT: xxswapd vs1, v6 1564; PWR9BE-NEXT: xsmuldp f0, f0, v6 1565; PWR9BE-NEXT: xsmuldp f0, f0, f1 1566; PWR9BE-NEXT: xxswapd vs1, v7 1567; PWR9BE-NEXT: xsmuldp f0, f0, v7 1568; PWR9BE-NEXT: xsmuldp f0, f0, f1 1569; PWR9BE-NEXT: xxswapd vs1, v8 1570; PWR9BE-NEXT: xsmuldp f0, f0, v8 1571; PWR9BE-NEXT: xsmuldp f0, f0, f1 1572; PWR9BE-NEXT: xxswapd vs1, v9 1573; PWR9BE-NEXT: xsmuldp f0, f0, v9 1574; PWR9BE-NEXT: xsmuldp f1, f0, f1 1575; PWR9BE-NEXT: blr 1576; 1577; PWR10LE-LABEL: v16f64_b: 1578; PWR10LE: # %bb.0: # %entry 1579; PWR10LE-NEXT: xxswapd vs0, v2 1580; PWR10LE-NEXT: xsmuldp f0, f1, f0 1581; PWR10LE-NEXT: xxswapd vs1, v3 1582; PWR10LE-NEXT: xsmuldp f0, f0, v2 1583; PWR10LE-NEXT: xsmuldp f0, f0, f1 1584; PWR10LE-NEXT: xxswapd vs1, v4 1585; PWR10LE-NEXT: xsmuldp f0, f0, v3 1586; PWR10LE-NEXT: xsmuldp f0, f0, f1 1587; PWR10LE-NEXT: xxswapd vs1, v5 1588; PWR10LE-NEXT: xsmuldp f0, f0, v4 1589; PWR10LE-NEXT: xsmuldp f0, f0, f1 1590; PWR10LE-NEXT: xxswapd vs1, v6 1591; PWR10LE-NEXT: xsmuldp f0, f0, v5 1592; PWR10LE-NEXT: xsmuldp f0, f0, f1 1593; PWR10LE-NEXT: xxswapd vs1, v7 1594; PWR10LE-NEXT: xsmuldp f0, f0, v6 1595; PWR10LE-NEXT: xsmuldp f0, f0, f1 1596; PWR10LE-NEXT: xxswapd vs1, v8 1597; PWR10LE-NEXT: xsmuldp f0, f0, v7 1598; PWR10LE-NEXT: xsmuldp f0, f0, f1 1599; PWR10LE-NEXT: xxswapd vs1, v9 1600; PWR10LE-NEXT: xsmuldp f0, f0, v8 1601; PWR10LE-NEXT: xsmuldp f0, f0, f1 1602; PWR10LE-NEXT: xsmuldp f1, f0, v9 1603; PWR10LE-NEXT: blr 1604; 1605; PWR10BE-LABEL: v16f64_b: 1606; PWR10BE: # %bb.0: # %entry 1607; PWR10BE-NEXT: xsmuldp f0, f1, v2 1608; PWR10BE-NEXT: xxswapd vs1, v2 1609; PWR10BE-NEXT: xsmuldp f0, f0, f1 1610; PWR10BE-NEXT: xxswapd vs1, v3 1611; PWR10BE-NEXT: xsmuldp f0, f0, v3 1612; PWR10BE-NEXT: xsmuldp f0, f0, f1 1613; PWR10BE-NEXT: xxswapd vs1, v4 1614; PWR10BE-NEXT: xsmuldp f0, f0, v4 1615; PWR10BE-NEXT: xsmuldp f0, f0, f1 1616; PWR10BE-NEXT: xxswapd vs1, v5 1617; PWR10BE-NEXT: xsmuldp f0, f0, v5 1618; PWR10BE-NEXT: xsmuldp f0, f0, f1 1619; PWR10BE-NEXT: xxswapd vs1, v6 1620; PWR10BE-NEXT: xsmuldp f0, f0, v6 1621; PWR10BE-NEXT: xsmuldp f0, f0, f1 1622; PWR10BE-NEXT: xxswapd vs1, v7 1623; PWR10BE-NEXT: xsmuldp f0, f0, v7 1624; PWR10BE-NEXT: xsmuldp f0, f0, f1 1625; PWR10BE-NEXT: xxswapd vs1, v8 1626; PWR10BE-NEXT: xsmuldp f0, f0, v8 1627; PWR10BE-NEXT: xsmuldp f0, f0, f1 1628; PWR10BE-NEXT: xxswapd vs1, v9 1629; PWR10BE-NEXT: xsmuldp f0, f0, v9 1630; PWR10BE-NEXT: xsmuldp f1, f0, f1 1631; PWR10BE-NEXT: blr 1632entry: 1633 %0 = call double @llvm.vector.reduce.fmul.v16f64(double %b, <16 x double> %a) 1634 ret double %0 1635} 1636 1637define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 { 1638; PWR9LE-LABEL: v16f64_fast: 1639; PWR9LE: # %bb.0: # %entry 1640; PWR9LE-NEXT: xvmuldp vs0, v4, v8 1641; PWR9LE-NEXT: xvmuldp vs1, v2, v6 1642; PWR9LE-NEXT: xvmuldp vs2, v5, v9 1643; PWR9LE-NEXT: xvmuldp vs3, v3, v7 1644; PWR9LE-NEXT: xvmuldp vs2, vs3, vs2 1645; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 1646; PWR9LE-NEXT: xvmuldp vs0, vs0, vs2 1647; PWR9LE-NEXT: xxswapd vs1, vs0 1648; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1649; PWR9LE-NEXT: xxswapd vs1, vs0 1650; PWR9LE-NEXT: blr 1651; 1652; PWR9BE-LABEL: v16f64_fast: 1653; PWR9BE: # %bb.0: # %entry 1654; PWR9BE-NEXT: xvmuldp vs0, v4, v8 1655; PWR9BE-NEXT: xvmuldp vs1, v2, v6 1656; PWR9BE-NEXT: xvmuldp vs2, v5, v9 1657; PWR9BE-NEXT: xvmuldp vs3, v3, v7 1658; PWR9BE-NEXT: xvmuldp vs2, vs3, vs2 1659; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 1660; PWR9BE-NEXT: xvmuldp vs0, vs0, vs2 1661; PWR9BE-NEXT: xxswapd vs1, vs0 1662; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1663; PWR9BE-NEXT: blr 1664; 1665; PWR10LE-LABEL: v16f64_fast: 1666; PWR10LE: # %bb.0: # %entry 1667; PWR10LE-NEXT: xvmuldp vs0, v4, v8 1668; PWR10LE-NEXT: xvmuldp vs1, v2, v6 1669; PWR10LE-NEXT: xvmuldp vs2, v5, v9 1670; PWR10LE-NEXT: xvmuldp vs3, v3, v7 1671; PWR10LE-NEXT: xvmuldp vs2, vs3, vs2 1672; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 1673; PWR10LE-NEXT: xvmuldp vs0, vs0, vs2 1674; PWR10LE-NEXT: xxswapd vs1, vs0 1675; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1676; PWR10LE-NEXT: xxswapd vs1, vs0 1677; PWR10LE-NEXT: blr 1678; 1679; PWR10BE-LABEL: v16f64_fast: 1680; PWR10BE: # %bb.0: # %entry 1681; PWR10BE-NEXT: xvmuldp vs0, v4, v8 1682; PWR10BE-NEXT: xvmuldp vs1, v2, v6 1683; PWR10BE-NEXT: xvmuldp vs2, v5, v9 1684; PWR10BE-NEXT: xvmuldp vs3, v3, v7 1685; PWR10BE-NEXT: xvmuldp vs2, vs3, vs2 1686; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 1687; PWR10BE-NEXT: xvmuldp vs0, vs0, vs2 1688; PWR10BE-NEXT: xxswapd vs1, vs0 1689; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1690; PWR10BE-NEXT: blr 1691entry: 1692 %0 = call fast double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) 1693 ret double %0 1694} 1695 1696declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) #0 1697declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) #0 1698declare double @llvm.vector.reduce.fmul.v8f64(double, <8 x double>) #0 1699declare double @llvm.vector.reduce.fmul.v16f64(double, <16 x double>) #0 1700 1701attributes #0 = { nounwind } 1702