1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE 8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 9; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE 10 11;; 12;; Vectors of type i32 13;; 14define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 { 15; PWR9LE-LABEL: v2i32: 16; PWR9LE: # %bb.0: # %entry 17; PWR9LE-NEXT: xxspltw vs0, v2, 2 18; PWR9LE-NEXT: li r3, 0 19; PWR9LE-NEXT: xxlor v2, v2, vs0 20; PWR9LE-NEXT: vextuwrx r3, r3, v2 21; PWR9LE-NEXT: blr 22; 23; PWR9BE-LABEL: v2i32: 24; PWR9BE: # %bb.0: # %entry 25; PWR9BE-NEXT: xxspltw vs0, v2, 1 26; PWR9BE-NEXT: li r3, 0 27; PWR9BE-NEXT: xxlor v2, v2, vs0 28; PWR9BE-NEXT: vextuwlx r3, r3, v2 29; PWR9BE-NEXT: blr 30; 31; PWR10LE-LABEL: v2i32: 32; PWR10LE: # %bb.0: # %entry 33; PWR10LE-NEXT: xxspltw vs0, v2, 2 34; PWR10LE-NEXT: li r3, 0 35; PWR10LE-NEXT: xxlor v2, v2, vs0 36; PWR10LE-NEXT: vextuwrx r3, r3, v2 37; PWR10LE-NEXT: blr 38; 39; PWR10BE-LABEL: v2i32: 40; PWR10BE: # %bb.0: # %entry 41; PWR10BE-NEXT: xxspltw vs0, v2, 1 42; PWR10BE-NEXT: li r3, 0 43; PWR10BE-NEXT: xxlor v2, v2, vs0 44; PWR10BE-NEXT: vextuwlx r3, r3, v2 45; PWR10BE-NEXT: blr 46entry: 47 %0 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a) 48 ret i32 %0 49} 50 51define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 { 52; PWR9LE-LABEL: v4i32: 53; PWR9LE: # %bb.0: # %entry 54; PWR9LE-NEXT: xxswapd v3, v2 55; PWR9LE-NEXT: li r3, 0 56; PWR9LE-NEXT: xxlor vs0, v2, v3 57; PWR9LE-NEXT: xxspltw vs1, vs0, 2 58; PWR9LE-NEXT: xxlor v2, vs0, vs1 59; PWR9LE-NEXT: vextuwrx r3, r3, v2 60; PWR9LE-NEXT: blr 61; 62; PWR9BE-LABEL: v4i32: 63; PWR9BE: # %bb.0: # %entry 64; PWR9BE-NEXT: xxswapd v3, v2 65; PWR9BE-NEXT: li r3, 0 66; PWR9BE-NEXT: xxlor vs0, v2, v3 67; PWR9BE-NEXT: xxspltw vs1, vs0, 1 68; PWR9BE-NEXT: xxlor v2, vs0, vs1 69; PWR9BE-NEXT: vextuwlx r3, r3, v2 70; PWR9BE-NEXT: blr 71; 72; PWR10LE-LABEL: v4i32: 73; PWR10LE: # %bb.0: # %entry 74; PWR10LE-NEXT: xxswapd v3, v2 75; PWR10LE-NEXT: li r3, 0 76; PWR10LE-NEXT: xxlor vs0, v2, v3 77; PWR10LE-NEXT: xxspltw vs0, vs0, 2 78; PWR10LE-NEXT: xxeval v2, v2, v3, vs0, 127 79; PWR10LE-NEXT: vextuwrx r3, r3, v2 80; PWR10LE-NEXT: blr 81; 82; PWR10BE-LABEL: v4i32: 83; PWR10BE: # %bb.0: # %entry 84; PWR10BE-NEXT: xxswapd v3, v2 85; PWR10BE-NEXT: li r3, 0 86; PWR10BE-NEXT: xxlor vs0, v2, v3 87; PWR10BE-NEXT: xxspltw vs0, vs0, 1 88; PWR10BE-NEXT: xxeval v2, v2, v3, vs0, 127 89; PWR10BE-NEXT: vextuwlx r3, r3, v2 90; PWR10BE-NEXT: blr 91entry: 92 %0 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) 93 ret i32 %0 94} 95 96define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 { 97; PWR9LE-LABEL: v8i32: 98; PWR9LE: # %bb.0: # %entry 99; PWR9LE-NEXT: xxlor vs0, v2, v3 100; PWR9LE-NEXT: li r3, 0 101; PWR9LE-NEXT: xxswapd v2, vs0 102; PWR9LE-NEXT: xxlor vs0, vs0, v2 103; PWR9LE-NEXT: xxspltw vs1, vs0, 2 104; PWR9LE-NEXT: xxlor v2, vs0, vs1 105; PWR9LE-NEXT: vextuwrx r3, r3, v2 106; PWR9LE-NEXT: blr 107; 108; PWR9BE-LABEL: v8i32: 109; PWR9BE: # %bb.0: # %entry 110; PWR9BE-NEXT: xxlor vs0, v2, v3 111; PWR9BE-NEXT: li r3, 0 112; PWR9BE-NEXT: xxswapd v2, vs0 113; PWR9BE-NEXT: xxlor vs0, vs0, v2 114; PWR9BE-NEXT: xxspltw vs1, vs0, 1 115; PWR9BE-NEXT: xxlor v2, vs0, vs1 116; PWR9BE-NEXT: vextuwlx r3, r3, v2 117; PWR9BE-NEXT: blr 118; 119; PWR10LE-LABEL: v8i32: 120; PWR10LE: # %bb.0: # %entry 121; PWR10LE-NEXT: xxlor vs0, v2, v3 122; PWR10LE-NEXT: li r3, 0 123; PWR10LE-NEXT: xxswapd v4, vs0 124; PWR10LE-NEXT: xxeval vs1, v2, v3, v4, 127 125; PWR10LE-NEXT: xxspltw vs1, vs1, 2 126; PWR10LE-NEXT: xxeval v2, vs0, v4, vs1, 127 127; PWR10LE-NEXT: vextuwrx r3, r3, v2 128; PWR10LE-NEXT: blr 129; 130; PWR10BE-LABEL: v8i32: 131; PWR10BE: # %bb.0: # %entry 132; PWR10BE-NEXT: xxlor vs0, v2, v3 133; PWR10BE-NEXT: li r3, 0 134; PWR10BE-NEXT: xxswapd v4, vs0 135; PWR10BE-NEXT: xxeval vs1, v2, v3, v4, 127 136; PWR10BE-NEXT: xxspltw vs1, vs1, 1 137; PWR10BE-NEXT: xxeval v2, vs0, v4, vs1, 127 138; PWR10BE-NEXT: vextuwlx r3, r3, v2 139; PWR10BE-NEXT: blr 140entry: 141 %0 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a) 142 ret i32 %0 143} 144 145define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 { 146; PWR9LE-LABEL: v16i32: 147; PWR9LE: # %bb.0: # %entry 148; PWR9LE-NEXT: xxlor vs0, v3, v5 149; PWR9LE-NEXT: xxlor vs1, v2, v4 150; PWR9LE-NEXT: li r3, 0 151; PWR9LE-NEXT: xxlor vs0, vs1, vs0 152; PWR9LE-NEXT: xxswapd v2, vs0 153; PWR9LE-NEXT: xxlor vs0, vs0, v2 154; PWR9LE-NEXT: xxspltw vs1, vs0, 2 155; PWR9LE-NEXT: xxlor v2, vs0, vs1 156; PWR9LE-NEXT: vextuwrx r3, r3, v2 157; PWR9LE-NEXT: blr 158; 159; PWR9BE-LABEL: v16i32: 160; PWR9BE: # %bb.0: # %entry 161; PWR9BE-NEXT: xxlor vs0, v3, v5 162; PWR9BE-NEXT: xxlor vs1, v2, v4 163; PWR9BE-NEXT: li r3, 0 164; PWR9BE-NEXT: xxlor vs0, vs1, vs0 165; PWR9BE-NEXT: xxswapd v2, vs0 166; PWR9BE-NEXT: xxlor vs0, vs0, v2 167; PWR9BE-NEXT: xxspltw vs1, vs0, 1 168; PWR9BE-NEXT: xxlor v2, vs0, vs1 169; PWR9BE-NEXT: vextuwlx r3, r3, v2 170; PWR9BE-NEXT: blr 171; 172; PWR10LE-LABEL: v16i32: 173; PWR10LE: # %bb.0: # %entry 174; PWR10LE-NEXT: xxlor vs1, v2, v4 175; PWR10LE-NEXT: xxlor vs0, v3, v5 176; PWR10LE-NEXT: li r3, 0 177; PWR10LE-NEXT: xxeval vs2, vs1, v3, v5, 127 178; PWR10LE-NEXT: xxswapd v2, vs2 179; PWR10LE-NEXT: xxeval vs0, vs1, vs0, v2, 127 180; PWR10LE-NEXT: xxspltw vs0, vs0, 2 181; PWR10LE-NEXT: xxeval v2, vs2, v2, vs0, 127 182; PWR10LE-NEXT: vextuwrx r3, r3, v2 183; PWR10LE-NEXT: blr 184; 185; PWR10BE-LABEL: v16i32: 186; PWR10BE: # %bb.0: # %entry 187; PWR10BE-NEXT: xxlor vs1, v2, v4 188; PWR10BE-NEXT: xxlor vs0, v3, v5 189; PWR10BE-NEXT: li r3, 0 190; PWR10BE-NEXT: xxeval vs2, vs1, v3, v5, 127 191; PWR10BE-NEXT: xxswapd v2, vs2 192; PWR10BE-NEXT: xxeval vs0, vs1, vs0, v2, 127 193; PWR10BE-NEXT: xxspltw vs0, vs0, 1 194; PWR10BE-NEXT: xxeval v2, vs2, v2, vs0, 127 195; PWR10BE-NEXT: vextuwlx r3, r3, v2 196; PWR10BE-NEXT: blr 197entry: 198 %0 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a) 199 ret i32 %0 200} 201 202declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) #0 203declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) #0 204declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) #0 205declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) #0 206 207;; 208;; Vectors of type i64 209;; 210define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 { 211; PWR9LE-LABEL: v2i64: 212; PWR9LE: # %bb.0: # %entry 213; PWR9LE-NEXT: xxswapd v3, v2 214; PWR9LE-NEXT: xxlor vs0, v2, v3 215; PWR9LE-NEXT: mfvsrld r3, vs0 216; PWR9LE-NEXT: blr 217; 218; PWR9BE-LABEL: v2i64: 219; PWR9BE: # %bb.0: # %entry 220; PWR9BE-NEXT: xxswapd v3, v2 221; PWR9BE-NEXT: xxlor vs0, v2, v3 222; PWR9BE-NEXT: mffprd r3, f0 223; PWR9BE-NEXT: blr 224; 225; PWR10LE-LABEL: v2i64: 226; PWR10LE: # %bb.0: # %entry 227; PWR10LE-NEXT: xxswapd v3, v2 228; PWR10LE-NEXT: xxlor vs0, v2, v3 229; PWR10LE-NEXT: mfvsrld r3, vs0 230; PWR10LE-NEXT: blr 231; 232; PWR10BE-LABEL: v2i64: 233; PWR10BE: # %bb.0: # %entry 234; PWR10BE-NEXT: xxswapd v3, v2 235; PWR10BE-NEXT: xxlor vs0, v2, v3 236; PWR10BE-NEXT: mffprd r3, f0 237; PWR10BE-NEXT: blr 238entry: 239 %0 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a) 240 ret i64 %0 241} 242 243define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 { 244; PWR9LE-LABEL: v4i64: 245; PWR9LE: # %bb.0: # %entry 246; PWR9LE-NEXT: xxlor vs0, v2, v3 247; PWR9LE-NEXT: xxswapd v2, vs0 248; PWR9LE-NEXT: xxlor vs0, vs0, v2 249; PWR9LE-NEXT: mfvsrld r3, vs0 250; PWR9LE-NEXT: blr 251; 252; PWR9BE-LABEL: v4i64: 253; PWR9BE: # %bb.0: # %entry 254; PWR9BE-NEXT: xxlor vs0, v2, v3 255; PWR9BE-NEXT: xxswapd v2, vs0 256; PWR9BE-NEXT: xxlor vs0, vs0, v2 257; PWR9BE-NEXT: mffprd r3, f0 258; PWR9BE-NEXT: blr 259; 260; PWR10LE-LABEL: v4i64: 261; PWR10LE: # %bb.0: # %entry 262; PWR10LE-NEXT: xxlor vs0, v2, v3 263; PWR10LE-NEXT: xxswapd v4, vs0 264; PWR10LE-NEXT: xxeval vs0, v2, v3, v4, 127 265; PWR10LE-NEXT: mfvsrld r3, vs0 266; PWR10LE-NEXT: blr 267; 268; PWR10BE-LABEL: v4i64: 269; PWR10BE: # %bb.0: # %entry 270; PWR10BE-NEXT: xxlor vs0, v2, v3 271; PWR10BE-NEXT: xxswapd v4, vs0 272; PWR10BE-NEXT: xxeval vs0, v2, v3, v4, 127 273; PWR10BE-NEXT: mffprd r3, f0 274; PWR10BE-NEXT: blr 275entry: 276 %0 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a) 277 ret i64 %0 278} 279 280define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 { 281; PWR9LE-LABEL: v8i64: 282; PWR9LE: # %bb.0: # %entry 283; PWR9LE-NEXT: xxlor vs0, v3, v5 284; PWR9LE-NEXT: xxlor vs1, v2, v4 285; PWR9LE-NEXT: xxlor vs0, vs1, vs0 286; PWR9LE-NEXT: xxswapd v2, vs0 287; PWR9LE-NEXT: xxlor vs0, vs0, v2 288; PWR9LE-NEXT: mfvsrld r3, vs0 289; PWR9LE-NEXT: blr 290; 291; PWR9BE-LABEL: v8i64: 292; PWR9BE: # %bb.0: # %entry 293; PWR9BE-NEXT: xxlor vs0, v3, v5 294; PWR9BE-NEXT: xxlor vs1, v2, v4 295; PWR9BE-NEXT: xxlor vs0, vs1, vs0 296; PWR9BE-NEXT: xxswapd v2, vs0 297; PWR9BE-NEXT: xxlor vs0, vs0, v2 298; PWR9BE-NEXT: mffprd r3, f0 299; PWR9BE-NEXT: blr 300; 301; PWR10LE-LABEL: v8i64: 302; PWR10LE: # %bb.0: # %entry 303; PWR10LE-NEXT: xxlor vs1, v2, v4 304; PWR10LE-NEXT: xxlor vs0, v3, v5 305; PWR10LE-NEXT: xxeval vs2, vs1, v3, v5, 127 306; PWR10LE-NEXT: xxswapd v2, vs2 307; PWR10LE-NEXT: xxeval vs0, vs1, vs0, v2, 127 308; PWR10LE-NEXT: mfvsrld r3, vs0 309; PWR10LE-NEXT: blr 310; 311; PWR10BE-LABEL: v8i64: 312; PWR10BE: # %bb.0: # %entry 313; PWR10BE-NEXT: xxlor vs1, v2, v4 314; PWR10BE-NEXT: xxlor vs0, v3, v5 315; PWR10BE-NEXT: xxeval vs2, vs1, v3, v5, 127 316; PWR10BE-NEXT: xxswapd v2, vs2 317; PWR10BE-NEXT: xxeval vs0, vs1, vs0, v2, 127 318; PWR10BE-NEXT: mffprd r3, f0 319; PWR10BE-NEXT: blr 320entry: 321 %0 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a) 322 ret i64 %0 323} 324 325define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 { 326; PWR9LE-LABEL: v16i64: 327; PWR9LE: # %bb.0: # %entry 328; PWR9LE-NEXT: xxlor vs0, v4, v8 329; PWR9LE-NEXT: xxlor vs1, v2, v6 330; PWR9LE-NEXT: xxlor vs2, v5, v9 331; PWR9LE-NEXT: xxlor vs3, v3, v7 332; PWR9LE-NEXT: xxlor vs2, vs3, vs2 333; PWR9LE-NEXT: xxlor vs0, vs1, vs0 334; PWR9LE-NEXT: xxlor vs0, vs0, vs2 335; PWR9LE-NEXT: xxswapd v2, vs0 336; PWR9LE-NEXT: xxlor vs0, vs0, v2 337; PWR9LE-NEXT: mfvsrld r3, vs0 338; PWR9LE-NEXT: blr 339; 340; PWR9BE-LABEL: v16i64: 341; PWR9BE: # %bb.0: # %entry 342; PWR9BE-NEXT: xxlor vs0, v4, v8 343; PWR9BE-NEXT: xxlor vs1, v2, v6 344; PWR9BE-NEXT: xxlor vs2, v5, v9 345; PWR9BE-NEXT: xxlor vs3, v3, v7 346; PWR9BE-NEXT: xxlor vs2, vs3, vs2 347; PWR9BE-NEXT: xxlor vs0, vs1, vs0 348; PWR9BE-NEXT: xxlor vs0, vs0, vs2 349; PWR9BE-NEXT: xxswapd v2, vs0 350; PWR9BE-NEXT: xxlor vs0, vs0, v2 351; PWR9BE-NEXT: mffprd r3, f0 352; PWR9BE-NEXT: blr 353; 354; PWR10LE-LABEL: v16i64: 355; PWR10LE: # %bb.0: # %entry 356; PWR10LE-NEXT: xxlor vs1, v2, v6 357; PWR10LE-NEXT: xxlor vs0, v5, v9 358; PWR10LE-NEXT: xxlor vs2, v3, v7 359; PWR10LE-NEXT: xxeval vs1, vs1, v4, v8, 127 360; PWR10LE-NEXT: xxeval vs3, vs2, v5, v9, 127 361; PWR10LE-NEXT: xxeval vs0, vs1, vs2, vs0, 127 362; PWR10LE-NEXT: xxswapd v2, vs0 363; PWR10LE-NEXT: xxeval vs0, vs1, vs3, v2, 127 364; PWR10LE-NEXT: mfvsrld r3, vs0 365; PWR10LE-NEXT: blr 366; 367; PWR10BE-LABEL: v16i64: 368; PWR10BE: # %bb.0: # %entry 369; PWR10BE-NEXT: xxlor vs1, v2, v6 370; PWR10BE-NEXT: xxlor vs0, v5, v9 371; PWR10BE-NEXT: xxlor vs2, v3, v7 372; PWR10BE-NEXT: xxeval vs1, vs1, v4, v8, 127 373; PWR10BE-NEXT: xxeval vs3, vs2, v5, v9, 127 374; PWR10BE-NEXT: xxeval vs0, vs1, vs2, vs0, 127 375; PWR10BE-NEXT: xxswapd v2, vs0 376; PWR10BE-NEXT: xxeval vs0, vs1, vs3, v2, 127 377; PWR10BE-NEXT: mffprd r3, f0 378; PWR10BE-NEXT: blr 379entry: 380 %0 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a) 381 ret i64 %0 382} 383 384declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) #0 385declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) #0 386declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) #0 387declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) #0 388 389 390attributes #0 = { nounwind } 391