1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \ 8; RUN: FileCheck %s --check-prefix=PWR10LE 9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 10; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \ 11; RUN: FileCheck %s --check-prefix=PWR10BE 12 13;; 14;; Vectors of f32 15;; 16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 { 17; PWR9LE-LABEL: v2f32: 18; PWR9LE: # %bb.0: # %entry 19; PWR9LE-NEXT: xxswapd vs0, v2 20; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 3 21; PWR9LE-NEXT: xscvspdpn f0, vs0 22; PWR9LE-NEXT: xscvspdpn f1, vs1 23; PWR9LE-NEXT: xsmindp f1, f1, f0 24; PWR9LE-NEXT: blr 25; 26; PWR9BE-LABEL: v2f32: 27; PWR9BE: # %bb.0: # %entry 28; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 29; PWR9BE-NEXT: xscvspdpn f0, v2 30; PWR9BE-NEXT: xscvspdpn f1, vs1 31; PWR9BE-NEXT: xsmindp f1, f0, f1 32; PWR9BE-NEXT: blr 33; 34; PWR10LE-LABEL: v2f32: 35; PWR10LE: # %bb.0: # %entry 36; PWR10LE-NEXT: xxswapd vs0, v2 37; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 3 38; PWR10LE-NEXT: xscvspdpn f0, vs0 39; PWR10LE-NEXT: xscvspdpn f1, vs1 40; PWR10LE-NEXT: xsmindp f1, f1, f0 41; PWR10LE-NEXT: blr 42; 43; PWR10BE-LABEL: v2f32: 44; PWR10BE: # %bb.0: # %entry 45; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 46; PWR10BE-NEXT: xscvspdpn f0, v2 47; PWR10BE-NEXT: xscvspdpn f1, vs1 48; PWR10BE-NEXT: xsmindp f1, f0, f1 49; PWR10BE-NEXT: blr 50entry: 51 %0 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) 52 ret float %0 53} 54 55define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 { 56; PWR9LE-LABEL: v2f32_fast: 57; PWR9LE: # %bb.0: # %entry 58; PWR9LE-NEXT: xxspltw vs0, v2, 2 59; PWR9LE-NEXT: xvminsp vs0, v2, vs0 60; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 61; PWR9LE-NEXT: xscvspdpn f1, vs0 62; PWR9LE-NEXT: blr 63; 64; PWR9BE-LABEL: v2f32_fast: 65; PWR9BE: # %bb.0: # %entry 66; PWR9BE-NEXT: xxspltw vs0, v2, 1 67; PWR9BE-NEXT: xvminsp vs0, v2, vs0 68; PWR9BE-NEXT: xscvspdpn f1, vs0 69; PWR9BE-NEXT: blr 70; 71; PWR10LE-LABEL: v2f32_fast: 72; PWR10LE: # %bb.0: # %entry 73; PWR10LE-NEXT: xxspltw vs0, v2, 2 74; PWR10LE-NEXT: xvminsp vs0, v2, vs0 75; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 76; PWR10LE-NEXT: xscvspdpn f1, vs0 77; PWR10LE-NEXT: blr 78; 79; PWR10BE-LABEL: v2f32_fast: 80; PWR10BE: # %bb.0: # %entry 81; PWR10BE-NEXT: xxspltw vs0, v2, 1 82; PWR10BE-NEXT: xvminsp vs0, v2, vs0 83; PWR10BE-NEXT: xscvspdpn f1, vs0 84; PWR10BE-NEXT: blr 85entry: 86 %0 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) 87 ret float %0 88} 89 90define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 { 91; PWR9LE-LABEL: v4f32: 92; PWR9LE: # %bb.0: # %entry 93; PWR9LE-NEXT: xxsldwi vs2, v2, v2, 3 94; PWR9LE-NEXT: xxswapd vs3, v2 95; PWR9LE-NEXT: xscvspdpn f0, v2 96; PWR9LE-NEXT: xscvspdpn f2, vs2 97; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 98; PWR9LE-NEXT: xscvspdpn f3, vs3 99; PWR9LE-NEXT: xscvspdpn f1, vs1 100; PWR9LE-NEXT: xsmindp f2, f2, f3 101; PWR9LE-NEXT: xsmindp f1, f2, f1 102; PWR9LE-NEXT: xsmindp f1, f1, f0 103; PWR9LE-NEXT: blr 104; 105; PWR9BE-LABEL: v4f32: 106; PWR9BE: # %bb.0: # %entry 107; PWR9BE-NEXT: xxsldwi vs2, v2, v2, 1 108; PWR9BE-NEXT: xxswapd vs1, v2 109; PWR9BE-NEXT: xscvspdpn f3, v2 110; PWR9BE-NEXT: xscvspdpn f2, vs2 111; PWR9BE-NEXT: xxsldwi vs0, v2, v2, 3 112; PWR9BE-NEXT: xscvspdpn f1, vs1 113; PWR9BE-NEXT: xscvspdpn f0, vs0 114; PWR9BE-NEXT: xsmindp f2, f3, f2 115; PWR9BE-NEXT: xsmindp f1, f2, f1 116; PWR9BE-NEXT: xsmindp f1, f1, f0 117; PWR9BE-NEXT: blr 118; 119; PWR10LE-LABEL: v4f32: 120; PWR10LE: # %bb.0: # %entry 121; PWR10LE-NEXT: xxsldwi vs2, v2, v2, 3 122; PWR10LE-NEXT: xxswapd vs3, v2 123; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 124; PWR10LE-NEXT: xscvspdpn f0, v2 125; PWR10LE-NEXT: xscvspdpn f2, vs2 126; PWR10LE-NEXT: xscvspdpn f3, vs3 127; PWR10LE-NEXT: xscvspdpn f1, vs1 128; PWR10LE-NEXT: xsmindp f2, f2, f3 129; PWR10LE-NEXT: xsmindp f1, f2, f1 130; PWR10LE-NEXT: xsmindp f1, f1, f0 131; PWR10LE-NEXT: blr 132; 133; PWR10BE-LABEL: v4f32: 134; PWR10BE: # %bb.0: # %entry 135; PWR10BE-NEXT: xxsldwi vs2, v2, v2, 1 136; PWR10BE-NEXT: xxswapd vs1, v2 137; PWR10BE-NEXT: xscvspdpn f3, v2 138; PWR10BE-NEXT: xxsldwi vs0, v2, v2, 3 139; PWR10BE-NEXT: xscvspdpn f2, vs2 140; PWR10BE-NEXT: xscvspdpn f1, vs1 141; PWR10BE-NEXT: xscvspdpn f0, vs0 142; PWR10BE-NEXT: xsmindp f2, f3, f2 143; PWR10BE-NEXT: xsmindp f1, f2, f1 144; PWR10BE-NEXT: xsmindp f1, f1, f0 145; PWR10BE-NEXT: blr 146entry: 147 %0 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) 148 ret float %0 149} 150 151define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 { 152; PWR9LE-LABEL: v4f32_fast: 153; PWR9LE: # %bb.0: # %entry 154; PWR9LE-NEXT: xxswapd v3, v2 155; PWR9LE-NEXT: xvminsp vs0, v2, v3 156; PWR9LE-NEXT: xxspltw vs1, vs0, 2 157; PWR9LE-NEXT: xvminsp vs0, vs0, vs1 158; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 159; PWR9LE-NEXT: xscvspdpn f1, vs0 160; PWR9LE-NEXT: blr 161; 162; PWR9BE-LABEL: v4f32_fast: 163; PWR9BE: # %bb.0: # %entry 164; PWR9BE-NEXT: xxswapd v3, v2 165; PWR9BE-NEXT: xvminsp vs0, v2, v3 166; PWR9BE-NEXT: xxspltw vs1, vs0, 1 167; PWR9BE-NEXT: xvminsp vs0, vs0, vs1 168; PWR9BE-NEXT: xscvspdpn f1, vs0 169; PWR9BE-NEXT: blr 170; 171; PWR10LE-LABEL: v4f32_fast: 172; PWR10LE: # %bb.0: # %entry 173; PWR10LE-NEXT: xxswapd v3, v2 174; PWR10LE-NEXT: xvminsp vs0, v2, v3 175; PWR10LE-NEXT: xxspltw vs1, vs0, 2 176; PWR10LE-NEXT: xvminsp vs0, vs0, vs1 177; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 178; PWR10LE-NEXT: xscvspdpn f1, vs0 179; PWR10LE-NEXT: blr 180; 181; PWR10BE-LABEL: v4f32_fast: 182; PWR10BE: # %bb.0: # %entry 183; PWR10BE-NEXT: xxswapd v3, v2 184; PWR10BE-NEXT: xvminsp vs0, v2, v3 185; PWR10BE-NEXT: xxspltw vs1, vs0, 1 186; PWR10BE-NEXT: xvminsp vs0, vs0, vs1 187; PWR10BE-NEXT: xscvspdpn f1, vs0 188; PWR10BE-NEXT: blr 189entry: 190 %0 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) 191 ret float %0 192} 193 194define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 { 195; PWR9LE-LABEL: v8f32: 196; PWR9LE: # %bb.0: # %entry 197; PWR9LE-NEXT: xvminsp vs0, v2, v3 198; PWR9LE-NEXT: xxswapd vs1, vs0 199; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 200; PWR9LE-NEXT: xscvspdpn f1, vs1 201; PWR9LE-NEXT: xscvspdpn f2, vs2 202; PWR9LE-NEXT: xsmindp f1, f2, f1 203; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 204; PWR9LE-NEXT: xscvspdpn f0, vs0 205; PWR9LE-NEXT: xscvspdpn f2, vs2 206; PWR9LE-NEXT: xsmindp f1, f1, f2 207; PWR9LE-NEXT: xsmindp f1, f1, f0 208; PWR9LE-NEXT: blr 209; 210; PWR9BE-LABEL: v8f32: 211; PWR9BE: # %bb.0: # %entry 212; PWR9BE-NEXT: xvminsp vs0, v2, v3 213; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 214; PWR9BE-NEXT: xscvspdpn f1, vs0 215; PWR9BE-NEXT: xscvspdpn f2, vs2 216; PWR9BE-NEXT: xsmindp f1, f1, f2 217; PWR9BE-NEXT: xxswapd vs2, vs0 218; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 219; PWR9BE-NEXT: xscvspdpn f2, vs2 220; PWR9BE-NEXT: xscvspdpn f0, vs0 221; PWR9BE-NEXT: xsmindp f1, f1, f2 222; PWR9BE-NEXT: xsmindp f1, f1, f0 223; PWR9BE-NEXT: blr 224; 225; PWR10LE-LABEL: v8f32: 226; PWR10LE: # %bb.0: # %entry 227; PWR10LE-NEXT: xvminsp vs0, v2, v3 228; PWR10LE-NEXT: xxswapd vs1, vs0 229; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 230; PWR10LE-NEXT: xscvspdpn f1, vs1 231; PWR10LE-NEXT: xscvspdpn f2, vs2 232; PWR10LE-NEXT: xsmindp f1, f2, f1 233; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 234; PWR10LE-NEXT: xscvspdpn f0, vs0 235; PWR10LE-NEXT: xscvspdpn f2, vs2 236; PWR10LE-NEXT: xsmindp f1, f1, f2 237; PWR10LE-NEXT: xsmindp f1, f1, f0 238; PWR10LE-NEXT: blr 239; 240; PWR10BE-LABEL: v8f32: 241; PWR10BE: # %bb.0: # %entry 242; PWR10BE-NEXT: xvminsp vs0, v2, v3 243; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 244; PWR10BE-NEXT: xscvspdpn f1, vs0 245; PWR10BE-NEXT: xscvspdpn f2, vs2 246; PWR10BE-NEXT: xsmindp f1, f1, f2 247; PWR10BE-NEXT: xxswapd vs2, vs0 248; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 249; PWR10BE-NEXT: xscvspdpn f2, vs2 250; PWR10BE-NEXT: xscvspdpn f0, vs0 251; PWR10BE-NEXT: xsmindp f1, f1, f2 252; PWR10BE-NEXT: xsmindp f1, f1, f0 253; PWR10BE-NEXT: blr 254entry: 255 %0 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a) 256 ret float %0 257} 258 259define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 { 260; PWR9LE-LABEL: v8f32_fast: 261; PWR9LE: # %bb.0: # %entry 262; PWR9LE-NEXT: xvminsp vs0, v2, v3 263; PWR9LE-NEXT: xxswapd v2, vs0 264; PWR9LE-NEXT: xvminsp vs0, vs0, v2 265; PWR9LE-NEXT: xxspltw vs1, vs0, 2 266; PWR9LE-NEXT: xvminsp vs0, vs0, vs1 267; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 268; PWR9LE-NEXT: xscvspdpn f1, vs0 269; PWR9LE-NEXT: blr 270; 271; PWR9BE-LABEL: v8f32_fast: 272; PWR9BE: # %bb.0: # %entry 273; PWR9BE-NEXT: xvminsp vs0, v2, v3 274; PWR9BE-NEXT: xxswapd v2, vs0 275; PWR9BE-NEXT: xvminsp vs0, vs0, v2 276; PWR9BE-NEXT: xxspltw vs1, vs0, 1 277; PWR9BE-NEXT: xvminsp vs0, vs0, vs1 278; PWR9BE-NEXT: xscvspdpn f1, vs0 279; PWR9BE-NEXT: blr 280; 281; PWR10LE-LABEL: v8f32_fast: 282; PWR10LE: # %bb.0: # %entry 283; PWR10LE-NEXT: xvminsp vs0, v2, v3 284; PWR10LE-NEXT: xxswapd v2, vs0 285; PWR10LE-NEXT: xvminsp vs0, vs0, v2 286; PWR10LE-NEXT: xxspltw vs1, vs0, 2 287; PWR10LE-NEXT: xvminsp vs0, vs0, vs1 288; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 289; PWR10LE-NEXT: xscvspdpn f1, vs0 290; PWR10LE-NEXT: blr 291; 292; PWR10BE-LABEL: v8f32_fast: 293; PWR10BE: # %bb.0: # %entry 294; PWR10BE-NEXT: xvminsp vs0, v2, v3 295; PWR10BE-NEXT: xxswapd v2, vs0 296; PWR10BE-NEXT: xvminsp vs0, vs0, v2 297; PWR10BE-NEXT: xxspltw vs1, vs0, 1 298; PWR10BE-NEXT: xvminsp vs0, vs0, vs1 299; PWR10BE-NEXT: xscvspdpn f1, vs0 300; PWR10BE-NEXT: blr 301entry: 302 %0 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a) 303 ret float %0 304} 305 306define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 { 307; PWR9LE-LABEL: v16f32: 308; PWR9LE: # %bb.0: # %entry 309; PWR9LE-NEXT: xvminsp vs0, v3, v5 310; PWR9LE-NEXT: xvminsp vs1, v2, v4 311; PWR9LE-NEXT: xvminsp vs0, vs1, vs0 312; PWR9LE-NEXT: xxswapd vs1, vs0 313; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 314; PWR9LE-NEXT: xscvspdpn f1, vs1 315; PWR9LE-NEXT: xscvspdpn f2, vs2 316; PWR9LE-NEXT: xsmindp f1, f2, f1 317; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 318; PWR9LE-NEXT: xscvspdpn f0, vs0 319; PWR9LE-NEXT: xscvspdpn f2, vs2 320; PWR9LE-NEXT: xsmindp f1, f1, f2 321; PWR9LE-NEXT: xsmindp f1, f1, f0 322; PWR9LE-NEXT: blr 323; 324; PWR9BE-LABEL: v16f32: 325; PWR9BE: # %bb.0: # %entry 326; PWR9BE-NEXT: xvminsp vs0, v3, v5 327; PWR9BE-NEXT: xvminsp vs1, v2, v4 328; PWR9BE-NEXT: xvminsp vs0, vs1, vs0 329; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 330; PWR9BE-NEXT: xscvspdpn f1, vs0 331; PWR9BE-NEXT: xscvspdpn f2, vs2 332; PWR9BE-NEXT: xsmindp f1, f1, f2 333; PWR9BE-NEXT: xxswapd vs2, vs0 334; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 335; PWR9BE-NEXT: xscvspdpn f2, vs2 336; PWR9BE-NEXT: xscvspdpn f0, vs0 337; PWR9BE-NEXT: xsmindp f1, f1, f2 338; PWR9BE-NEXT: xsmindp f1, f1, f0 339; PWR9BE-NEXT: blr 340; 341; PWR10LE-LABEL: v16f32: 342; PWR10LE: # %bb.0: # %entry 343; PWR10LE-NEXT: xvminsp vs0, v3, v5 344; PWR10LE-NEXT: xvminsp vs1, v2, v4 345; PWR10LE-NEXT: xvminsp vs0, vs1, vs0 346; PWR10LE-NEXT: xxswapd vs1, vs0 347; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 348; PWR10LE-NEXT: xscvspdpn f1, vs1 349; PWR10LE-NEXT: xscvspdpn f2, vs2 350; PWR10LE-NEXT: xsmindp f1, f2, f1 351; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 352; PWR10LE-NEXT: xscvspdpn f0, vs0 353; PWR10LE-NEXT: xscvspdpn f2, vs2 354; PWR10LE-NEXT: xsmindp f1, f1, f2 355; PWR10LE-NEXT: xsmindp f1, f1, f0 356; PWR10LE-NEXT: blr 357; 358; PWR10BE-LABEL: v16f32: 359; PWR10BE: # %bb.0: # %entry 360; PWR10BE-NEXT: xvminsp vs0, v3, v5 361; PWR10BE-NEXT: xvminsp vs1, v2, v4 362; PWR10BE-NEXT: xvminsp vs0, vs1, vs0 363; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 364; PWR10BE-NEXT: xscvspdpn f1, vs0 365; PWR10BE-NEXT: xscvspdpn f2, vs2 366; PWR10BE-NEXT: xsmindp f1, f1, f2 367; PWR10BE-NEXT: xxswapd vs2, vs0 368; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 369; PWR10BE-NEXT: xscvspdpn f2, vs2 370; PWR10BE-NEXT: xscvspdpn f0, vs0 371; PWR10BE-NEXT: xsmindp f1, f1, f2 372; PWR10BE-NEXT: xsmindp f1, f1, f0 373; PWR10BE-NEXT: blr 374entry: 375 %0 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) 376 ret float %0 377} 378 379define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 { 380; PWR9LE-LABEL: v16f32_fast: 381; PWR9LE: # %bb.0: # %entry 382; PWR9LE-NEXT: xvminsp vs0, v3, v5 383; PWR9LE-NEXT: xvminsp vs1, v2, v4 384; PWR9LE-NEXT: xvminsp vs0, vs1, vs0 385; PWR9LE-NEXT: xxswapd v2, vs0 386; PWR9LE-NEXT: xvminsp vs0, vs0, v2 387; PWR9LE-NEXT: xxspltw vs1, vs0, 2 388; PWR9LE-NEXT: xvminsp vs0, vs0, vs1 389; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 390; PWR9LE-NEXT: xscvspdpn f1, vs0 391; PWR9LE-NEXT: blr 392; 393; PWR9BE-LABEL: v16f32_fast: 394; PWR9BE: # %bb.0: # %entry 395; PWR9BE-NEXT: xvminsp vs0, v3, v5 396; PWR9BE-NEXT: xvminsp vs1, v2, v4 397; PWR9BE-NEXT: xvminsp vs0, vs1, vs0 398; PWR9BE-NEXT: xxswapd v2, vs0 399; PWR9BE-NEXT: xvminsp vs0, vs0, v2 400; PWR9BE-NEXT: xxspltw vs1, vs0, 1 401; PWR9BE-NEXT: xvminsp vs0, vs0, vs1 402; PWR9BE-NEXT: xscvspdpn f1, vs0 403; PWR9BE-NEXT: blr 404; 405; PWR10LE-LABEL: v16f32_fast: 406; PWR10LE: # %bb.0: # %entry 407; PWR10LE-NEXT: xvminsp vs0, v3, v5 408; PWR10LE-NEXT: xvminsp vs1, v2, v4 409; PWR10LE-NEXT: xvminsp vs0, vs1, vs0 410; PWR10LE-NEXT: xxswapd v2, vs0 411; PWR10LE-NEXT: xvminsp vs0, vs0, v2 412; PWR10LE-NEXT: xxspltw vs1, vs0, 2 413; PWR10LE-NEXT: xvminsp vs0, vs0, vs1 414; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 415; PWR10LE-NEXT: xscvspdpn f1, vs0 416; PWR10LE-NEXT: blr 417; 418; PWR10BE-LABEL: v16f32_fast: 419; PWR10BE: # %bb.0: # %entry 420; PWR10BE-NEXT: xvminsp vs0, v3, v5 421; PWR10BE-NEXT: xvminsp vs1, v2, v4 422; PWR10BE-NEXT: xvminsp vs0, vs1, vs0 423; PWR10BE-NEXT: xxswapd v2, vs0 424; PWR10BE-NEXT: xvminsp vs0, vs0, v2 425; PWR10BE-NEXT: xxspltw vs1, vs0, 1 426; PWR10BE-NEXT: xvminsp vs0, vs0, vs1 427; PWR10BE-NEXT: xscvspdpn f1, vs0 428; PWR10BE-NEXT: blr 429entry: 430 %0 = call fast float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) 431 ret float %0 432} 433 434define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 { 435; PWR9LE-LABEL: v32f32: 436; PWR9LE: # %bb.0: # %entry 437; PWR9LE-NEXT: xvminsp vs0, v5, v9 438; PWR9LE-NEXT: xvminsp vs1, v3, v7 439; PWR9LE-NEXT: xvminsp vs2, v2, v6 440; PWR9LE-NEXT: xvminsp vs0, vs1, vs0 441; PWR9LE-NEXT: xvminsp vs1, v4, v8 442; PWR9LE-NEXT: xvminsp vs1, vs2, vs1 443; PWR9LE-NEXT: xvminsp vs0, vs1, vs0 444; PWR9LE-NEXT: xxswapd vs1, vs0 445; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 446; PWR9LE-NEXT: xscvspdpn f1, vs1 447; PWR9LE-NEXT: xscvspdpn f2, vs2 448; PWR9LE-NEXT: xsmindp f1, f2, f1 449; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 450; PWR9LE-NEXT: xscvspdpn f0, vs0 451; PWR9LE-NEXT: xscvspdpn f2, vs2 452; PWR9LE-NEXT: xsmindp f1, f1, f2 453; PWR9LE-NEXT: xsmindp f1, f1, f0 454; PWR9LE-NEXT: blr 455; 456; PWR9BE-LABEL: v32f32: 457; PWR9BE: # %bb.0: # %entry 458; PWR9BE-NEXT: xvminsp vs0, v5, v9 459; PWR9BE-NEXT: xvminsp vs1, v3, v7 460; PWR9BE-NEXT: xvminsp vs2, v2, v6 461; PWR9BE-NEXT: xvminsp vs0, vs1, vs0 462; PWR9BE-NEXT: xvminsp vs1, v4, v8 463; PWR9BE-NEXT: xvminsp vs1, vs2, vs1 464; PWR9BE-NEXT: xvminsp vs0, vs1, vs0 465; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 466; PWR9BE-NEXT: xscvspdpn f1, vs0 467; PWR9BE-NEXT: xscvspdpn f2, vs2 468; PWR9BE-NEXT: xsmindp f1, f1, f2 469; PWR9BE-NEXT: xxswapd vs2, vs0 470; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 471; PWR9BE-NEXT: xscvspdpn f2, vs2 472; PWR9BE-NEXT: xscvspdpn f0, vs0 473; PWR9BE-NEXT: xsmindp f1, f1, f2 474; PWR9BE-NEXT: xsmindp f1, f1, f0 475; PWR9BE-NEXT: blr 476; 477; PWR10LE-LABEL: v32f32: 478; PWR10LE: # %bb.0: # %entry 479; PWR10LE-NEXT: xvminsp vs0, v5, v9 480; PWR10LE-NEXT: xvminsp vs1, v3, v7 481; PWR10LE-NEXT: xvminsp vs2, v2, v6 482; PWR10LE-NEXT: xvminsp vs0, vs1, vs0 483; PWR10LE-NEXT: xvminsp vs1, v4, v8 484; PWR10LE-NEXT: xvminsp vs1, vs2, vs1 485; PWR10LE-NEXT: xvminsp vs0, vs1, vs0 486; PWR10LE-NEXT: xxswapd vs1, vs0 487; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 488; PWR10LE-NEXT: xscvspdpn f1, vs1 489; PWR10LE-NEXT: xscvspdpn f2, vs2 490; PWR10LE-NEXT: xsmindp f1, f2, f1 491; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 492; PWR10LE-NEXT: xscvspdpn f0, vs0 493; PWR10LE-NEXT: xscvspdpn f2, vs2 494; PWR10LE-NEXT: xsmindp f1, f1, f2 495; PWR10LE-NEXT: xsmindp f1, f1, f0 496; PWR10LE-NEXT: blr 497; 498; PWR10BE-LABEL: v32f32: 499; PWR10BE: # %bb.0: # %entry 500; PWR10BE-NEXT: xvminsp vs0, v5, v9 501; PWR10BE-NEXT: xvminsp vs1, v3, v7 502; PWR10BE-NEXT: xvminsp vs2, v2, v6 503; PWR10BE-NEXT: xvminsp vs0, vs1, vs0 504; PWR10BE-NEXT: xvminsp vs1, v4, v8 505; PWR10BE-NEXT: xvminsp vs1, vs2, vs1 506; PWR10BE-NEXT: xvminsp vs0, vs1, vs0 507; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 508; PWR10BE-NEXT: xscvspdpn f1, vs0 509; PWR10BE-NEXT: xscvspdpn f2, vs2 510; PWR10BE-NEXT: xsmindp f1, f1, f2 511; PWR10BE-NEXT: xxswapd vs2, vs0 512; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 513; PWR10BE-NEXT: xscvspdpn f2, vs2 514; PWR10BE-NEXT: xscvspdpn f0, vs0 515; PWR10BE-NEXT: xsmindp f1, f1, f2 516; PWR10BE-NEXT: xsmindp f1, f1, f0 517; PWR10BE-NEXT: blr 518entry: 519 %0 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a) 520 ret float %0 521} 522 523define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 { 524; PWR9LE-LABEL: v32f32_fast: 525; PWR9LE: # %bb.0: # %entry 526; PWR9LE-NEXT: xvminsp vs0, v4, v8 527; PWR9LE-NEXT: xvminsp vs1, v2, v6 528; PWR9LE-NEXT: xvminsp vs2, v5, v9 529; PWR9LE-NEXT: xvminsp vs3, v3, v7 530; PWR9LE-NEXT: xvminsp vs2, vs3, vs2 531; PWR9LE-NEXT: xvminsp vs0, vs1, vs0 532; PWR9LE-NEXT: xvminsp vs0, vs0, vs2 533; PWR9LE-NEXT: xxswapd v2, vs0 534; PWR9LE-NEXT: xvminsp vs0, vs0, v2 535; PWR9LE-NEXT: xxspltw vs1, vs0, 2 536; PWR9LE-NEXT: xvminsp vs0, vs0, vs1 537; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 538; PWR9LE-NEXT: xscvspdpn f1, vs0 539; PWR9LE-NEXT: blr 540; 541; PWR9BE-LABEL: v32f32_fast: 542; PWR9BE: # %bb.0: # %entry 543; PWR9BE-NEXT: xvminsp vs0, v4, v8 544; PWR9BE-NEXT: xvminsp vs1, v2, v6 545; PWR9BE-NEXT: xvminsp vs2, v5, v9 546; PWR9BE-NEXT: xvminsp vs3, v3, v7 547; PWR9BE-NEXT: xvminsp vs2, vs3, vs2 548; PWR9BE-NEXT: xvminsp vs0, vs1, vs0 549; PWR9BE-NEXT: xvminsp vs0, vs0, vs2 550; PWR9BE-NEXT: xxswapd v2, vs0 551; PWR9BE-NEXT: xvminsp vs0, vs0, v2 552; PWR9BE-NEXT: xxspltw vs1, vs0, 1 553; PWR9BE-NEXT: xvminsp vs0, vs0, vs1 554; PWR9BE-NEXT: xscvspdpn f1, vs0 555; PWR9BE-NEXT: blr 556; 557; PWR10LE-LABEL: v32f32_fast: 558; PWR10LE: # %bb.0: # %entry 559; PWR10LE-NEXT: xvminsp vs0, v4, v8 560; PWR10LE-NEXT: xvminsp vs1, v2, v6 561; PWR10LE-NEXT: xvminsp vs2, v5, v9 562; PWR10LE-NEXT: xvminsp vs3, v3, v7 563; PWR10LE-NEXT: xvminsp vs2, vs3, vs2 564; PWR10LE-NEXT: xvminsp vs0, vs1, vs0 565; PWR10LE-NEXT: xvminsp vs0, vs0, vs2 566; PWR10LE-NEXT: xxswapd v2, vs0 567; PWR10LE-NEXT: xvminsp vs0, vs0, v2 568; PWR10LE-NEXT: xxspltw vs1, vs0, 2 569; PWR10LE-NEXT: xvminsp vs0, vs0, vs1 570; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 571; PWR10LE-NEXT: xscvspdpn f1, vs0 572; PWR10LE-NEXT: blr 573; 574; PWR10BE-LABEL: v32f32_fast: 575; PWR10BE: # %bb.0: # %entry 576; PWR10BE-NEXT: xvminsp vs0, v4, v8 577; PWR10BE-NEXT: xvminsp vs1, v2, v6 578; PWR10BE-NEXT: xvminsp vs2, v5, v9 579; PWR10BE-NEXT: xvminsp vs3, v3, v7 580; PWR10BE-NEXT: xvminsp vs2, vs3, vs2 581; PWR10BE-NEXT: xvminsp vs0, vs1, vs0 582; PWR10BE-NEXT: xvminsp vs0, vs0, vs2 583; PWR10BE-NEXT: xxswapd v2, vs0 584; PWR10BE-NEXT: xvminsp vs0, vs0, v2 585; PWR10BE-NEXT: xxspltw vs1, vs0, 1 586; PWR10BE-NEXT: xvminsp vs0, vs0, vs1 587; PWR10BE-NEXT: xscvspdpn f1, vs0 588; PWR10BE-NEXT: blr 589entry: 590 %0 = call fast float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a) 591 ret float %0 592} 593 594declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) #0 595declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) #0 596declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) #0 597declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) #0 598declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) #0 599 600;; 601;; Vectors of f64 602;; 603define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 { 604; PWR9LE-LABEL: v2f64: 605; PWR9LE: # %bb.0: # %entry 606; PWR9LE-NEXT: xxswapd vs0, v2 607; PWR9LE-NEXT: xsmindp f1, f0, v2 608; PWR9LE-NEXT: blr 609; 610; PWR9BE-LABEL: v2f64: 611; PWR9BE: # %bb.0: # %entry 612; PWR9BE-NEXT: xxswapd vs0, v2 613; PWR9BE-NEXT: xsmindp f1, v2, f0 614; PWR9BE-NEXT: blr 615; 616; PWR10LE-LABEL: v2f64: 617; PWR10LE: # %bb.0: # %entry 618; PWR10LE-NEXT: xxswapd vs0, v2 619; PWR10LE-NEXT: xsmindp f1, f0, v2 620; PWR10LE-NEXT: blr 621; 622; PWR10BE-LABEL: v2f64: 623; PWR10BE: # %bb.0: # %entry 624; PWR10BE-NEXT: xxswapd vs0, v2 625; PWR10BE-NEXT: xsmindp f1, v2, f0 626; PWR10BE-NEXT: blr 627entry: 628 %0 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) 629 ret double %0 630} 631 632define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 { 633; PWR9LE-LABEL: v2f64_fast: 634; PWR9LE: # %bb.0: # %entry 635; PWR9LE-NEXT: xxswapd vs0, v2 636; PWR9LE-NEXT: xvmindp vs0, v2, vs0 637; PWR9LE-NEXT: xxswapd vs1, vs0 638; PWR9LE-NEXT: blr 639; 640; PWR9BE-LABEL: v2f64_fast: 641; PWR9BE: # %bb.0: # %entry 642; PWR9BE-NEXT: xxswapd vs0, v2 643; PWR9BE-NEXT: xvmindp vs1, v2, vs0 644; PWR9BE-NEXT: blr 645; 646; PWR10LE-LABEL: v2f64_fast: 647; PWR10LE: # %bb.0: # %entry 648; PWR10LE-NEXT: xxswapd vs0, v2 649; PWR10LE-NEXT: xvmindp vs0, v2, vs0 650; PWR10LE-NEXT: xxswapd vs1, vs0 651; PWR10LE-NEXT: blr 652; 653; PWR10BE-LABEL: v2f64_fast: 654; PWR10BE: # %bb.0: # %entry 655; PWR10BE-NEXT: xxswapd vs0, v2 656; PWR10BE-NEXT: xvmindp vs1, v2, vs0 657; PWR10BE-NEXT: blr 658entry: 659 %0 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) 660 ret double %0 661} 662 663define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 { 664; PWR9LE-LABEL: v4f64: 665; PWR9LE: # %bb.0: # %entry 666; PWR9LE-NEXT: xvmindp vs0, v2, v3 667; PWR9LE-NEXT: xxswapd vs1, vs0 668; PWR9LE-NEXT: xsmindp f1, f1, f0 669; PWR9LE-NEXT: blr 670; 671; PWR9BE-LABEL: v4f64: 672; PWR9BE: # %bb.0: # %entry 673; PWR9BE-NEXT: xvmindp vs0, v2, v3 674; PWR9BE-NEXT: xxswapd vs1, vs0 675; PWR9BE-NEXT: xsmindp f1, f0, f1 676; PWR9BE-NEXT: blr 677; 678; PWR10LE-LABEL: v4f64: 679; PWR10LE: # %bb.0: # %entry 680; PWR10LE-NEXT: xvmindp vs0, v2, v3 681; PWR10LE-NEXT: xxswapd vs1, vs0 682; PWR10LE-NEXT: xsmindp f1, f1, f0 683; PWR10LE-NEXT: blr 684; 685; PWR10BE-LABEL: v4f64: 686; PWR10BE: # %bb.0: # %entry 687; PWR10BE-NEXT: xvmindp vs0, v2, v3 688; PWR10BE-NEXT: xxswapd vs1, vs0 689; PWR10BE-NEXT: xsmindp f1, f0, f1 690; PWR10BE-NEXT: blr 691entry: 692 %0 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a) 693 ret double %0 694} 695 696define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 { 697; PWR9LE-LABEL: v4f64_fast: 698; PWR9LE: # %bb.0: # %entry 699; PWR9LE-NEXT: xvmindp vs0, v2, v3 700; PWR9LE-NEXT: xxswapd vs1, vs0 701; PWR9LE-NEXT: xvmindp vs0, vs0, vs1 702; PWR9LE-NEXT: xxswapd vs1, vs0 703; PWR9LE-NEXT: blr 704; 705; PWR9BE-LABEL: v4f64_fast: 706; PWR9BE: # %bb.0: # %entry 707; PWR9BE-NEXT: xvmindp vs0, v2, v3 708; PWR9BE-NEXT: xxswapd vs1, vs0 709; PWR9BE-NEXT: xvmindp vs1, vs0, vs1 710; PWR9BE-NEXT: blr 711; 712; PWR10LE-LABEL: v4f64_fast: 713; PWR10LE: # %bb.0: # %entry 714; PWR10LE-NEXT: xvmindp vs0, v2, v3 715; PWR10LE-NEXT: xxswapd vs1, vs0 716; PWR10LE-NEXT: xvmindp vs0, vs0, vs1 717; PWR10LE-NEXT: xxswapd vs1, vs0 718; PWR10LE-NEXT: blr 719; 720; PWR10BE-LABEL: v4f64_fast: 721; PWR10BE: # %bb.0: # %entry 722; PWR10BE-NEXT: xvmindp vs0, v2, v3 723; PWR10BE-NEXT: xxswapd vs1, vs0 724; PWR10BE-NEXT: xvmindp vs1, vs0, vs1 725; PWR10BE-NEXT: blr 726entry: 727 %0 = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a) 728 ret double %0 729} 730 731define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 { 732; PWR9LE-LABEL: v8f64: 733; PWR9LE: # %bb.0: # %entry 734; PWR9LE-NEXT: xvmindp vs0, v3, v5 735; PWR9LE-NEXT: xvmindp vs1, v2, v4 736; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 737; PWR9LE-NEXT: xxswapd vs1, vs0 738; PWR9LE-NEXT: xsmindp f1, f1, f0 739; PWR9LE-NEXT: blr 740; 741; PWR9BE-LABEL: v8f64: 742; PWR9BE: # %bb.0: # %entry 743; PWR9BE-NEXT: xvmindp vs0, v3, v5 744; PWR9BE-NEXT: xvmindp vs1, v2, v4 745; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 746; PWR9BE-NEXT: xxswapd vs1, vs0 747; PWR9BE-NEXT: xsmindp f1, f0, f1 748; PWR9BE-NEXT: blr 749; 750; PWR10LE-LABEL: v8f64: 751; PWR10LE: # %bb.0: # %entry 752; PWR10LE-NEXT: xvmindp vs0, v3, v5 753; PWR10LE-NEXT: xvmindp vs1, v2, v4 754; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 755; PWR10LE-NEXT: xxswapd vs1, vs0 756; PWR10LE-NEXT: xsmindp f1, f1, f0 757; PWR10LE-NEXT: blr 758; 759; PWR10BE-LABEL: v8f64: 760; PWR10BE: # %bb.0: # %entry 761; PWR10BE-NEXT: xvmindp vs0, v3, v5 762; PWR10BE-NEXT: xvmindp vs1, v2, v4 763; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 764; PWR10BE-NEXT: xxswapd vs1, vs0 765; PWR10BE-NEXT: xsmindp f1, f0, f1 766; PWR10BE-NEXT: blr 767entry: 768 %0 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a) 769 ret double %0 770} 771 772define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 { 773; PWR9LE-LABEL: v8f64_fast: 774; PWR9LE: # %bb.0: # %entry 775; PWR9LE-NEXT: xvmindp vs0, v3, v5 776; PWR9LE-NEXT: xvmindp vs1, v2, v4 777; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 778; PWR9LE-NEXT: xxswapd vs1, vs0 779; PWR9LE-NEXT: xvmindp vs0, vs0, vs1 780; PWR9LE-NEXT: xxswapd vs1, vs0 781; PWR9LE-NEXT: blr 782; 783; PWR9BE-LABEL: v8f64_fast: 784; PWR9BE: # %bb.0: # %entry 785; PWR9BE-NEXT: xvmindp vs0, v3, v5 786; PWR9BE-NEXT: xvmindp vs1, v2, v4 787; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 788; PWR9BE-NEXT: xxswapd vs1, vs0 789; PWR9BE-NEXT: xvmindp vs1, vs0, vs1 790; PWR9BE-NEXT: blr 791; 792; PWR10LE-LABEL: v8f64_fast: 793; PWR10LE: # %bb.0: # %entry 794; PWR10LE-NEXT: xvmindp vs0, v3, v5 795; PWR10LE-NEXT: xvmindp vs1, v2, v4 796; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 797; PWR10LE-NEXT: xxswapd vs1, vs0 798; PWR10LE-NEXT: xvmindp vs0, vs0, vs1 799; PWR10LE-NEXT: xxswapd vs1, vs0 800; PWR10LE-NEXT: blr 801; 802; PWR10BE-LABEL: v8f64_fast: 803; PWR10BE: # %bb.0: # %entry 804; PWR10BE-NEXT: xvmindp vs0, v3, v5 805; PWR10BE-NEXT: xvmindp vs1, v2, v4 806; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 807; PWR10BE-NEXT: xxswapd vs1, vs0 808; PWR10BE-NEXT: xvmindp vs1, vs0, vs1 809; PWR10BE-NEXT: blr 810entry: 811 %0 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a) 812 ret double %0 813} 814 815define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 { 816; PWR9LE-LABEL: v16f64: 817; PWR9LE: # %bb.0: # %entry 818; PWR9LE-NEXT: xvmindp vs0, v5, v9 819; PWR9LE-NEXT: xvmindp vs1, v3, v7 820; PWR9LE-NEXT: xvmindp vs2, v2, v6 821; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 822; PWR9LE-NEXT: xvmindp vs1, v4, v8 823; PWR9LE-NEXT: xvmindp vs1, vs2, vs1 824; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 825; PWR9LE-NEXT: xxswapd vs1, vs0 826; PWR9LE-NEXT: xsmindp f1, f1, f0 827; PWR9LE-NEXT: blr 828; 829; PWR9BE-LABEL: v16f64: 830; PWR9BE: # %bb.0: # %entry 831; PWR9BE-NEXT: xvmindp vs0, v5, v9 832; PWR9BE-NEXT: xvmindp vs1, v3, v7 833; PWR9BE-NEXT: xvmindp vs2, v2, v6 834; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 835; PWR9BE-NEXT: xvmindp vs1, v4, v8 836; PWR9BE-NEXT: xvmindp vs1, vs2, vs1 837; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 838; PWR9BE-NEXT: xxswapd vs1, vs0 839; PWR9BE-NEXT: xsmindp f1, f0, f1 840; PWR9BE-NEXT: blr 841; 842; PWR10LE-LABEL: v16f64: 843; PWR10LE: # %bb.0: # %entry 844; PWR10LE-NEXT: xvmindp vs0, v5, v9 845; PWR10LE-NEXT: xvmindp vs1, v3, v7 846; PWR10LE-NEXT: xvmindp vs2, v2, v6 847; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 848; PWR10LE-NEXT: xvmindp vs1, v4, v8 849; PWR10LE-NEXT: xvmindp vs1, vs2, vs1 850; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 851; PWR10LE-NEXT: xxswapd vs1, vs0 852; PWR10LE-NEXT: xsmindp f1, f1, f0 853; PWR10LE-NEXT: blr 854; 855; PWR10BE-LABEL: v16f64: 856; PWR10BE: # %bb.0: # %entry 857; PWR10BE-NEXT: xvmindp vs0, v5, v9 858; PWR10BE-NEXT: xvmindp vs1, v3, v7 859; PWR10BE-NEXT: xvmindp vs2, v2, v6 860; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 861; PWR10BE-NEXT: xvmindp vs1, v4, v8 862; PWR10BE-NEXT: xvmindp vs1, vs2, vs1 863; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 864; PWR10BE-NEXT: xxswapd vs1, vs0 865; PWR10BE-NEXT: xsmindp f1, f0, f1 866; PWR10BE-NEXT: blr 867entry: 868 %0 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a) 869 ret double %0 870} 871 872define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 { 873; PWR9LE-LABEL: v16f64_fast: 874; PWR9LE: # %bb.0: # %entry 875; PWR9LE-NEXT: xvmindp vs0, v4, v8 876; PWR9LE-NEXT: xvmindp vs1, v2, v6 877; PWR9LE-NEXT: xvmindp vs2, v5, v9 878; PWR9LE-NEXT: xvmindp vs3, v3, v7 879; PWR9LE-NEXT: xvmindp vs2, vs3, vs2 880; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 881; PWR9LE-NEXT: xvmindp vs0, vs0, vs2 882; PWR9LE-NEXT: xxswapd vs1, vs0 883; PWR9LE-NEXT: xvmindp vs0, vs0, vs1 884; PWR9LE-NEXT: xxswapd vs1, vs0 885; PWR9LE-NEXT: blr 886; 887; PWR9BE-LABEL: v16f64_fast: 888; PWR9BE: # %bb.0: # %entry 889; PWR9BE-NEXT: xvmindp vs0, v4, v8 890; PWR9BE-NEXT: xvmindp vs1, v2, v6 891; PWR9BE-NEXT: xvmindp vs2, v5, v9 892; PWR9BE-NEXT: xvmindp vs3, v3, v7 893; PWR9BE-NEXT: xvmindp vs2, vs3, vs2 894; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 895; PWR9BE-NEXT: xvmindp vs0, vs0, vs2 896; PWR9BE-NEXT: xxswapd vs1, vs0 897; PWR9BE-NEXT: xvmindp vs1, vs0, vs1 898; PWR9BE-NEXT: blr 899; 900; PWR10LE-LABEL: v16f64_fast: 901; PWR10LE: # %bb.0: # %entry 902; PWR10LE-NEXT: xvmindp vs0, v4, v8 903; PWR10LE-NEXT: xvmindp vs1, v2, v6 904; PWR10LE-NEXT: xvmindp vs2, v5, v9 905; PWR10LE-NEXT: xvmindp vs3, v3, v7 906; PWR10LE-NEXT: xvmindp vs2, vs3, vs2 907; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 908; PWR10LE-NEXT: xvmindp vs0, vs0, vs2 909; PWR10LE-NEXT: xxswapd vs1, vs0 910; PWR10LE-NEXT: xvmindp vs0, vs0, vs1 911; PWR10LE-NEXT: xxswapd vs1, vs0 912; PWR10LE-NEXT: blr 913; 914; PWR10BE-LABEL: v16f64_fast: 915; PWR10BE: # %bb.0: # %entry 916; PWR10BE-NEXT: xvmindp vs0, v4, v8 917; PWR10BE-NEXT: xvmindp vs1, v2, v6 918; PWR10BE-NEXT: xvmindp vs2, v5, v9 919; PWR10BE-NEXT: xvmindp vs3, v3, v7 920; PWR10BE-NEXT: xvmindp vs2, vs3, vs2 921; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 922; PWR10BE-NEXT: xvmindp vs0, vs0, vs2 923; PWR10BE-NEXT: xxswapd vs1, vs0 924; PWR10BE-NEXT: xvmindp vs1, vs0, vs1 925; PWR10BE-NEXT: blr 926entry: 927 %0 = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a) 928 ret double %0 929} 930 931define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 { 932; PWR9LE-LABEL: v32f64: 933; PWR9LE: # %bb.0: # %entry 934; PWR9LE-NEXT: lxv vs3, 272(r1) 935; PWR9LE-NEXT: lxv vs2, 240(r1) 936; PWR9LE-NEXT: xvmindp vs4, v5, v13 937; PWR9LE-NEXT: lxv vs1, 256(r1) 938; PWR9LE-NEXT: lxv vs0, 224(r1) 939; PWR9LE-NEXT: xvmindp vs3, v9, vs3 940; PWR9LE-NEXT: xvmindp vs2, v7, vs2 941; PWR9LE-NEXT: xvmindp vs1, v8, vs1 942; PWR9LE-NEXT: xvmindp vs0, v6, vs0 943; PWR9LE-NEXT: xvmindp vs3, vs4, vs3 944; PWR9LE-NEXT: xvmindp vs4, v3, v11 945; PWR9LE-NEXT: xvmindp vs2, vs4, vs2 946; PWR9LE-NEXT: xvmindp vs2, vs2, vs3 947; PWR9LE-NEXT: xvmindp vs3, v4, v12 948; PWR9LE-NEXT: xvmindp vs1, vs3, vs1 949; PWR9LE-NEXT: xvmindp vs3, v2, v10 950; PWR9LE-NEXT: xvmindp vs0, vs3, vs0 951; PWR9LE-NEXT: xvmindp vs0, vs0, vs1 952; PWR9LE-NEXT: xvmindp vs0, vs0, vs2 953; PWR9LE-NEXT: xxswapd vs1, vs0 954; PWR9LE-NEXT: xsmindp f1, f1, f0 955; PWR9LE-NEXT: blr 956; 957; PWR9BE-LABEL: v32f64: 958; PWR9BE: # %bb.0: # %entry 959; PWR9BE-NEXT: lxv vs3, 288(r1) 960; PWR9BE-NEXT: lxv vs2, 256(r1) 961; PWR9BE-NEXT: xvmindp vs4, v5, v13 962; PWR9BE-NEXT: lxv vs1, 272(r1) 963; PWR9BE-NEXT: lxv vs0, 240(r1) 964; PWR9BE-NEXT: xvmindp vs3, v9, vs3 965; PWR9BE-NEXT: xvmindp vs2, v7, vs2 966; PWR9BE-NEXT: xvmindp vs1, v8, vs1 967; PWR9BE-NEXT: xvmindp vs0, v6, vs0 968; PWR9BE-NEXT: xvmindp vs3, vs4, vs3 969; PWR9BE-NEXT: xvmindp vs4, v3, v11 970; PWR9BE-NEXT: xvmindp vs2, vs4, vs2 971; PWR9BE-NEXT: xvmindp vs2, vs2, vs3 972; PWR9BE-NEXT: xvmindp vs3, v4, v12 973; PWR9BE-NEXT: xvmindp vs1, vs3, vs1 974; PWR9BE-NEXT: xvmindp vs3, v2, v10 975; PWR9BE-NEXT: xvmindp vs0, vs3, vs0 976; PWR9BE-NEXT: xvmindp vs0, vs0, vs1 977; PWR9BE-NEXT: xvmindp vs0, vs0, vs2 978; PWR9BE-NEXT: xxswapd vs1, vs0 979; PWR9BE-NEXT: xsmindp f1, f0, f1 980; PWR9BE-NEXT: blr 981; 982; PWR10LE-LABEL: v32f64: 983; PWR10LE: # %bb.0: # %entry 984; PWR10LE-NEXT: lxv vs3, 272(r1) 985; PWR10LE-NEXT: lxv vs2, 240(r1) 986; PWR10LE-NEXT: xvmindp vs4, v5, v13 987; PWR10LE-NEXT: xvmindp vs3, v9, vs3 988; PWR10LE-NEXT: lxv vs1, 256(r1) 989; PWR10LE-NEXT: xvmindp vs2, v7, vs2 990; PWR10LE-NEXT: lxv vs0, 224(r1) 991; PWR10LE-NEXT: xvmindp vs1, v8, vs1 992; PWR10LE-NEXT: xvmindp vs0, v6, vs0 993; PWR10LE-NEXT: xvmindp vs3, vs4, vs3 994; PWR10LE-NEXT: xvmindp vs4, v3, v11 995; PWR10LE-NEXT: xvmindp vs2, vs4, vs2 996; PWR10LE-NEXT: xvmindp vs2, vs2, vs3 997; PWR10LE-NEXT: xvmindp vs3, v4, v12 998; PWR10LE-NEXT: xvmindp vs1, vs3, vs1 999; PWR10LE-NEXT: xvmindp vs3, v2, v10 1000; PWR10LE-NEXT: xvmindp vs0, vs3, vs0 1001; PWR10LE-NEXT: xvmindp vs0, vs0, vs1 1002; PWR10LE-NEXT: xvmindp vs0, vs0, vs2 1003; PWR10LE-NEXT: xxswapd vs1, vs0 1004; PWR10LE-NEXT: xsmindp f1, f1, f0 1005; PWR10LE-NEXT: blr 1006; 1007; PWR10BE-LABEL: v32f64: 1008; PWR10BE: # %bb.0: # %entry 1009; PWR10BE-NEXT: lxv vs3, 288(r1) 1010; PWR10BE-NEXT: lxv vs2, 256(r1) 1011; PWR10BE-NEXT: xvmindp vs4, v5, v13 1012; PWR10BE-NEXT: xvmindp vs3, v9, vs3 1013; PWR10BE-NEXT: lxv vs1, 272(r1) 1014; PWR10BE-NEXT: xvmindp vs2, v7, vs2 1015; PWR10BE-NEXT: lxv vs0, 240(r1) 1016; PWR10BE-NEXT: xvmindp vs1, v8, vs1 1017; PWR10BE-NEXT: xvmindp vs0, v6, vs0 1018; PWR10BE-NEXT: xvmindp vs3, vs4, vs3 1019; PWR10BE-NEXT: xvmindp vs4, v3, v11 1020; PWR10BE-NEXT: xvmindp vs2, vs4, vs2 1021; PWR10BE-NEXT: xvmindp vs2, vs2, vs3 1022; PWR10BE-NEXT: xvmindp vs3, v4, v12 1023; PWR10BE-NEXT: xvmindp vs1, vs3, vs1 1024; PWR10BE-NEXT: xvmindp vs3, v2, v10 1025; PWR10BE-NEXT: xvmindp vs0, vs3, vs0 1026; PWR10BE-NEXT: xvmindp vs0, vs0, vs1 1027; PWR10BE-NEXT: xvmindp vs0, vs0, vs2 1028; PWR10BE-NEXT: xxswapd vs1, vs0 1029; PWR10BE-NEXT: xsmindp f1, f0, f1 1030; PWR10BE-NEXT: blr 1031entry: 1032 %0 = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a) 1033 ret double %0 1034} 1035 1036define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 { 1037; PWR9LE-LABEL: v32f64_fast: 1038; PWR9LE: # %bb.0: # %entry 1039; PWR9LE-NEXT: lxv vs0, 256(r1) 1040; PWR9LE-NEXT: lxv vs1, 224(r1) 1041; PWR9LE-NEXT: lxv vs2, 272(r1) 1042; PWR9LE-NEXT: lxv vs3, 240(r1) 1043; PWR9LE-NEXT: xvmindp vs4, v3, v11 1044; PWR9LE-NEXT: xvmindp vs5, v5, v13 1045; PWR9LE-NEXT: xvmindp vs6, v2, v10 1046; PWR9LE-NEXT: xvmindp vs7, v4, v12 1047; PWR9LE-NEXT: xvmindp vs3, v7, vs3 1048; PWR9LE-NEXT: xvmindp vs2, v9, vs2 1049; PWR9LE-NEXT: xvmindp vs1, v6, vs1 1050; PWR9LE-NEXT: xvmindp vs0, v8, vs0 1051; PWR9LE-NEXT: xvmindp vs0, vs7, vs0 1052; PWR9LE-NEXT: xvmindp vs1, vs6, vs1 1053; PWR9LE-NEXT: xvmindp vs2, vs5, vs2 1054; PWR9LE-NEXT: xvmindp vs3, vs4, vs3 1055; PWR9LE-NEXT: xvmindp vs2, vs3, vs2 1056; PWR9LE-NEXT: xvmindp vs0, vs1, vs0 1057; PWR9LE-NEXT: xvmindp vs0, vs0, vs2 1058; PWR9LE-NEXT: xxswapd vs1, vs0 1059; PWR9LE-NEXT: xvmindp vs0, vs0, vs1 1060; PWR9LE-NEXT: xxswapd vs1, vs0 1061; PWR9LE-NEXT: blr 1062; 1063; PWR9BE-LABEL: v32f64_fast: 1064; PWR9BE: # %bb.0: # %entry 1065; PWR9BE-NEXT: lxv vs0, 272(r1) 1066; PWR9BE-NEXT: lxv vs1, 240(r1) 1067; PWR9BE-NEXT: lxv vs2, 288(r1) 1068; PWR9BE-NEXT: lxv vs3, 256(r1) 1069; PWR9BE-NEXT: xvmindp vs4, v3, v11 1070; PWR9BE-NEXT: xvmindp vs5, v5, v13 1071; PWR9BE-NEXT: xvmindp vs6, v2, v10 1072; PWR9BE-NEXT: xvmindp vs7, v4, v12 1073; PWR9BE-NEXT: xvmindp vs3, v7, vs3 1074; PWR9BE-NEXT: xvmindp vs2, v9, vs2 1075; PWR9BE-NEXT: xvmindp vs1, v6, vs1 1076; PWR9BE-NEXT: xvmindp vs0, v8, vs0 1077; PWR9BE-NEXT: xvmindp vs0, vs7, vs0 1078; PWR9BE-NEXT: xvmindp vs1, vs6, vs1 1079; PWR9BE-NEXT: xvmindp vs2, vs5, vs2 1080; PWR9BE-NEXT: xvmindp vs3, vs4, vs3 1081; PWR9BE-NEXT: xvmindp vs2, vs3, vs2 1082; PWR9BE-NEXT: xvmindp vs0, vs1, vs0 1083; PWR9BE-NEXT: xvmindp vs0, vs0, vs2 1084; PWR9BE-NEXT: xxswapd vs1, vs0 1085; PWR9BE-NEXT: xvmindp vs1, vs0, vs1 1086; PWR9BE-NEXT: blr 1087; 1088; PWR10LE-LABEL: v32f64_fast: 1089; PWR10LE: # %bb.0: # %entry 1090; PWR10LE-NEXT: lxv vs0, 256(r1) 1091; PWR10LE-NEXT: lxv vs1, 224(r1) 1092; PWR10LE-NEXT: xvmindp vs4, v3, v11 1093; PWR10LE-NEXT: xvmindp vs5, v5, v13 1094; PWR10LE-NEXT: xvmindp vs6, v2, v10 1095; PWR10LE-NEXT: xvmindp vs7, v4, v12 1096; PWR10LE-NEXT: xvmindp vs1, v6, vs1 1097; PWR10LE-NEXT: lxv vs2, 272(r1) 1098; PWR10LE-NEXT: lxv vs3, 240(r1) 1099; PWR10LE-NEXT: xvmindp vs3, v7, vs3 1100; PWR10LE-NEXT: xvmindp vs2, v9, vs2 1101; PWR10LE-NEXT: xvmindp vs0, v8, vs0 1102; PWR10LE-NEXT: xvmindp vs0, vs7, vs0 1103; PWR10LE-NEXT: xvmindp vs1, vs6, vs1 1104; PWR10LE-NEXT: xvmindp vs2, vs5, vs2 1105; PWR10LE-NEXT: xvmindp vs3, vs4, vs3 1106; PWR10LE-NEXT: xvmindp vs2, vs3, vs2 1107; PWR10LE-NEXT: xvmindp vs0, vs1, vs0 1108; PWR10LE-NEXT: xvmindp vs0, vs0, vs2 1109; PWR10LE-NEXT: xxswapd vs1, vs0 1110; PWR10LE-NEXT: xvmindp vs0, vs0, vs1 1111; PWR10LE-NEXT: xxswapd vs1, vs0 1112; PWR10LE-NEXT: blr 1113; 1114; PWR10BE-LABEL: v32f64_fast: 1115; PWR10BE: # %bb.0: # %entry 1116; PWR10BE-NEXT: lxv vs0, 272(r1) 1117; PWR10BE-NEXT: lxv vs1, 240(r1) 1118; PWR10BE-NEXT: xvmindp vs4, v3, v11 1119; PWR10BE-NEXT: xvmindp vs5, v5, v13 1120; PWR10BE-NEXT: xvmindp vs6, v2, v10 1121; PWR10BE-NEXT: xvmindp vs7, v4, v12 1122; PWR10BE-NEXT: xvmindp vs1, v6, vs1 1123; PWR10BE-NEXT: lxv vs2, 288(r1) 1124; PWR10BE-NEXT: lxv vs3, 256(r1) 1125; PWR10BE-NEXT: xvmindp vs3, v7, vs3 1126; PWR10BE-NEXT: xvmindp vs2, v9, vs2 1127; PWR10BE-NEXT: xvmindp vs0, v8, vs0 1128; PWR10BE-NEXT: xvmindp vs0, vs7, vs0 1129; PWR10BE-NEXT: xvmindp vs1, vs6, vs1 1130; PWR10BE-NEXT: xvmindp vs2, vs5, vs2 1131; PWR10BE-NEXT: xvmindp vs3, vs4, vs3 1132; PWR10BE-NEXT: xvmindp vs2, vs3, vs2 1133; PWR10BE-NEXT: xvmindp vs0, vs1, vs0 1134; PWR10BE-NEXT: xvmindp vs0, vs0, vs2 1135; PWR10BE-NEXT: xxswapd vs1, vs0 1136; PWR10BE-NEXT: xvmindp vs1, vs0, vs1 1137; PWR10BE-NEXT: blr 1138entry: 1139 %0 = call fast double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a) 1140 ret double %0 1141} 1142 1143declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) #0 1144declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) #0 1145declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) #0 1146declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) #0 1147declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) #0 1148 1149attributes #0 = { nounwind } 1150