1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE 8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 9; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE 10 11;; 12;; Vectors of i8 13;; 14define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 { 15; PWR9LE-LABEL: v2i8: 16; PWR9LE: # %bb.0: # %entry 17; PWR9LE-NEXT: vspltb v3, v2, 14 18; PWR9LE-NEXT: li r3, 0 19; PWR9LE-NEXT: vaddubm v2, v2, v3 20; PWR9LE-NEXT: vextubrx r3, r3, v2 21; PWR9LE-NEXT: blr 22; 23; PWR9BE-LABEL: v2i8: 24; PWR9BE: # %bb.0: # %entry 25; PWR9BE-NEXT: vspltb v3, v2, 1 26; PWR9BE-NEXT: li r3, 0 27; PWR9BE-NEXT: vaddubm v2, v2, v3 28; PWR9BE-NEXT: vextublx r3, r3, v2 29; PWR9BE-NEXT: blr 30; 31; PWR10LE-LABEL: v2i8: 32; PWR10LE: # %bb.0: # %entry 33; PWR10LE-NEXT: vspltb v3, v2, 14 34; PWR10LE-NEXT: li r3, 0 35; PWR10LE-NEXT: vaddubm v2, v2, v3 36; PWR10LE-NEXT: vextubrx r3, r3, v2 37; PWR10LE-NEXT: blr 38; 39; PWR10BE-LABEL: v2i8: 40; PWR10BE: # %bb.0: # %entry 41; PWR10BE-NEXT: vspltb v3, v2, 1 42; PWR10BE-NEXT: li r3, 0 43; PWR10BE-NEXT: vaddubm v2, v2, v3 44; PWR10BE-NEXT: vextublx r3, r3, v2 45; PWR10BE-NEXT: blr 46entry: 47 %0 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a) 48 ret i8 %0 49} 50 51define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 { 52; PWR9LE-LABEL: v4i8: 53; PWR9LE: # %bb.0: # %entry 54; PWR9LE-NEXT: vsplth v3, v2, 6 55; PWR9LE-NEXT: li r3, 0 56; PWR9LE-NEXT: vaddubm v2, v2, v3 57; PWR9LE-NEXT: vspltb v3, v2, 14 58; PWR9LE-NEXT: vaddubm v2, v2, v3 59; PWR9LE-NEXT: vextubrx r3, r3, v2 60; PWR9LE-NEXT: blr 61; 62; PWR9BE-LABEL: v4i8: 63; PWR9BE: # %bb.0: # %entry 64; PWR9BE-NEXT: vsplth v3, v2, 1 65; PWR9BE-NEXT: li r3, 0 66; PWR9BE-NEXT: vaddubm v2, v2, v3 67; PWR9BE-NEXT: vspltb v3, v2, 1 68; PWR9BE-NEXT: vaddubm v2, v2, v3 69; PWR9BE-NEXT: vextublx r3, r3, v2 70; PWR9BE-NEXT: blr 71; 72; PWR10LE-LABEL: v4i8: 73; PWR10LE: # %bb.0: # %entry 74; PWR10LE-NEXT: vsplth v3, v2, 6 75; PWR10LE-NEXT: li r3, 0 76; PWR10LE-NEXT: vaddubm v2, v2, v3 77; PWR10LE-NEXT: vspltb v3, v2, 14 78; PWR10LE-NEXT: vaddubm v2, v2, v3 79; PWR10LE-NEXT: vextubrx r3, r3, v2 80; PWR10LE-NEXT: blr 81; 82; PWR10BE-LABEL: v4i8: 83; PWR10BE: # %bb.0: # %entry 84; PWR10BE-NEXT: vsplth v3, v2, 1 85; PWR10BE-NEXT: li r3, 0 86; PWR10BE-NEXT: vaddubm v2, v2, v3 87; PWR10BE-NEXT: vspltb v3, v2, 1 88; PWR10BE-NEXT: vaddubm v2, v2, v3 89; PWR10BE-NEXT: vextublx r3, r3, v2 90; PWR10BE-NEXT: blr 91entry: 92 %0 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a) 93 ret i8 %0 94} 95 96define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 { 97; PWR9LE-LABEL: v8i8: 98; PWR9LE: # %bb.0: # %entry 99; PWR9LE-NEXT: xxspltw v3, v2, 2 100; PWR9LE-NEXT: li r3, 0 101; PWR9LE-NEXT: vaddubm v2, v2, v3 102; PWR9LE-NEXT: vsplth v3, v2, 6 103; PWR9LE-NEXT: vaddubm v2, v2, v3 104; PWR9LE-NEXT: vspltb v3, v2, 14 105; PWR9LE-NEXT: vaddubm v2, v2, v3 106; PWR9LE-NEXT: vextubrx r3, r3, v2 107; PWR9LE-NEXT: blr 108; 109; PWR9BE-LABEL: v8i8: 110; PWR9BE: # %bb.0: # %entry 111; PWR9BE-NEXT: xxspltw v3, v2, 1 112; PWR9BE-NEXT: li r3, 0 113; PWR9BE-NEXT: vaddubm v2, v2, v3 114; PWR9BE-NEXT: vsplth v3, v2, 1 115; PWR9BE-NEXT: vaddubm v2, v2, v3 116; PWR9BE-NEXT: vspltb v3, v2, 1 117; PWR9BE-NEXT: vaddubm v2, v2, v3 118; PWR9BE-NEXT: vextublx r3, r3, v2 119; PWR9BE-NEXT: blr 120; 121; PWR10LE-LABEL: v8i8: 122; PWR10LE: # %bb.0: # %entry 123; PWR10LE-NEXT: xxspltw v3, v2, 2 124; PWR10LE-NEXT: li r3, 0 125; PWR10LE-NEXT: vaddubm v2, v2, v3 126; PWR10LE-NEXT: vsplth v3, v2, 6 127; PWR10LE-NEXT: vaddubm v2, v2, v3 128; PWR10LE-NEXT: vspltb v3, v2, 14 129; PWR10LE-NEXT: vaddubm v2, v2, v3 130; PWR10LE-NEXT: vextubrx r3, r3, v2 131; PWR10LE-NEXT: blr 132; 133; PWR10BE-LABEL: v8i8: 134; PWR10BE: # %bb.0: # %entry 135; PWR10BE-NEXT: xxspltw v3, v2, 1 136; PWR10BE-NEXT: li r3, 0 137; PWR10BE-NEXT: vaddubm v2, v2, v3 138; PWR10BE-NEXT: vsplth v3, v2, 1 139; PWR10BE-NEXT: vaddubm v2, v2, v3 140; PWR10BE-NEXT: vspltb v3, v2, 1 141; PWR10BE-NEXT: vaddubm v2, v2, v3 142; PWR10BE-NEXT: vextublx r3, r3, v2 143; PWR10BE-NEXT: blr 144entry: 145 %0 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) 146 ret i8 %0 147} 148 149define dso_local signext i8 @v16i8_sign(<16 x i8> %a) local_unnamed_addr #0 { 150; PWR9LE-LABEL: v16i8_sign: 151; PWR9LE: # %bb.0: # %entry 152; PWR9LE-NEXT: xxswapd v3, v2 153; PWR9LE-NEXT: li r3, 0 154; PWR9LE-NEXT: vaddubm v2, v2, v3 155; PWR9LE-NEXT: xxspltw v3, v2, 2 156; PWR9LE-NEXT: vaddubm v2, v2, v3 157; PWR9LE-NEXT: vsplth v3, v2, 6 158; PWR9LE-NEXT: vaddubm v2, v2, v3 159; PWR9LE-NEXT: vspltb v3, v2, 14 160; PWR9LE-NEXT: vaddubm v2, v2, v3 161; PWR9LE-NEXT: vextubrx r3, r3, v2 162; PWR9LE-NEXT: extsb r3, r3 163; PWR9LE-NEXT: blr 164; 165; PWR9BE-LABEL: v16i8_sign: 166; PWR9BE: # %bb.0: # %entry 167; PWR9BE-NEXT: xxswapd v3, v2 168; PWR9BE-NEXT: li r3, 0 169; PWR9BE-NEXT: vaddubm v2, v2, v3 170; PWR9BE-NEXT: xxspltw v3, v2, 1 171; PWR9BE-NEXT: vaddubm v2, v2, v3 172; PWR9BE-NEXT: vsplth v3, v2, 1 173; PWR9BE-NEXT: vaddubm v2, v2, v3 174; PWR9BE-NEXT: vspltb v3, v2, 1 175; PWR9BE-NEXT: vaddubm v2, v2, v3 176; PWR9BE-NEXT: vextublx r3, r3, v2 177; PWR9BE-NEXT: extsb r3, r3 178; PWR9BE-NEXT: blr 179; 180; PWR10LE-LABEL: v16i8_sign: 181; PWR10LE: # %bb.0: # %entry 182; PWR10LE-NEXT: xxswapd v3, v2 183; PWR10LE-NEXT: li r3, 0 184; PWR10LE-NEXT: vaddubm v2, v2, v3 185; PWR10LE-NEXT: xxspltw v3, v2, 2 186; PWR10LE-NEXT: vaddubm v2, v2, v3 187; PWR10LE-NEXT: vsplth v3, v2, 6 188; PWR10LE-NEXT: vaddubm v2, v2, v3 189; PWR10LE-NEXT: vspltb v3, v2, 14 190; PWR10LE-NEXT: vaddubm v2, v2, v3 191; PWR10LE-NEXT: vextubrx r3, r3, v2 192; PWR10LE-NEXT: extsb r3, r3 193; PWR10LE-NEXT: blr 194; 195; PWR10BE-LABEL: v16i8_sign: 196; PWR10BE: # %bb.0: # %entry 197; PWR10BE-NEXT: xxswapd v3, v2 198; PWR10BE-NEXT: li r3, 0 199; PWR10BE-NEXT: vaddubm v2, v2, v3 200; PWR10BE-NEXT: xxspltw v3, v2, 1 201; PWR10BE-NEXT: vaddubm v2, v2, v3 202; PWR10BE-NEXT: vsplth v3, v2, 1 203; PWR10BE-NEXT: vaddubm v2, v2, v3 204; PWR10BE-NEXT: vspltb v3, v2, 1 205; PWR10BE-NEXT: vaddubm v2, v2, v3 206; PWR10BE-NEXT: vextublx r3, r3, v2 207; PWR10BE-NEXT: extsb r3, r3 208; PWR10BE-NEXT: blr 209entry: 210 %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) 211 ret i8 %0 212} 213 214define dso_local zeroext i8 @v16i8_zero(<16 x i8> %a) local_unnamed_addr #0 { 215; PWR9LE-LABEL: v16i8_zero: 216; PWR9LE: # %bb.0: # %entry 217; PWR9LE-NEXT: xxswapd v3, v2 218; PWR9LE-NEXT: li r3, 0 219; PWR9LE-NEXT: vaddubm v2, v2, v3 220; PWR9LE-NEXT: xxspltw v3, v2, 2 221; PWR9LE-NEXT: vaddubm v2, v2, v3 222; PWR9LE-NEXT: vsplth v3, v2, 6 223; PWR9LE-NEXT: vaddubm v2, v2, v3 224; PWR9LE-NEXT: vspltb v3, v2, 14 225; PWR9LE-NEXT: vaddubm v2, v2, v3 226; PWR9LE-NEXT: vextubrx r3, r3, v2 227; PWR9LE-NEXT: clrldi r3, r3, 56 228; PWR9LE-NEXT: blr 229; 230; PWR9BE-LABEL: v16i8_zero: 231; PWR9BE: # %bb.0: # %entry 232; PWR9BE-NEXT: xxswapd v3, v2 233; PWR9BE-NEXT: li r3, 0 234; PWR9BE-NEXT: vaddubm v2, v2, v3 235; PWR9BE-NEXT: xxspltw v3, v2, 1 236; PWR9BE-NEXT: vaddubm v2, v2, v3 237; PWR9BE-NEXT: vsplth v3, v2, 1 238; PWR9BE-NEXT: vaddubm v2, v2, v3 239; PWR9BE-NEXT: vspltb v3, v2, 1 240; PWR9BE-NEXT: vaddubm v2, v2, v3 241; PWR9BE-NEXT: vextublx r3, r3, v2 242; PWR9BE-NEXT: clrldi r3, r3, 56 243; PWR9BE-NEXT: blr 244; 245; PWR10LE-LABEL: v16i8_zero: 246; PWR10LE: # %bb.0: # %entry 247; PWR10LE-NEXT: xxswapd v3, v2 248; PWR10LE-NEXT: li r3, 0 249; PWR10LE-NEXT: vaddubm v2, v2, v3 250; PWR10LE-NEXT: xxspltw v3, v2, 2 251; PWR10LE-NEXT: vaddubm v2, v2, v3 252; PWR10LE-NEXT: vsplth v3, v2, 6 253; PWR10LE-NEXT: vaddubm v2, v2, v3 254; PWR10LE-NEXT: vspltb v3, v2, 14 255; PWR10LE-NEXT: vaddubm v2, v2, v3 256; PWR10LE-NEXT: vextubrx r3, r3, v2 257; PWR10LE-NEXT: clrldi r3, r3, 56 258; PWR10LE-NEXT: blr 259; 260; PWR10BE-LABEL: v16i8_zero: 261; PWR10BE: # %bb.0: # %entry 262; PWR10BE-NEXT: xxswapd v3, v2 263; PWR10BE-NEXT: li r3, 0 264; PWR10BE-NEXT: vaddubm v2, v2, v3 265; PWR10BE-NEXT: xxspltw v3, v2, 1 266; PWR10BE-NEXT: vaddubm v2, v2, v3 267; PWR10BE-NEXT: vsplth v3, v2, 1 268; PWR10BE-NEXT: vaddubm v2, v2, v3 269; PWR10BE-NEXT: vspltb v3, v2, 1 270; PWR10BE-NEXT: vaddubm v2, v2, v3 271; PWR10BE-NEXT: vextublx r3, r3, v2 272; PWR10BE-NEXT: clrldi r3, r3, 56 273; PWR10BE-NEXT: blr 274entry: 275 %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) 276 ret i8 %0 277} 278 279define dso_local i8 @v32i8(<32 x i8> %a) local_unnamed_addr #0 { 280; PWR9LE-LABEL: v32i8: 281; PWR9LE: # %bb.0: # %entry 282; PWR9LE-NEXT: vaddubm v2, v2, v3 283; PWR9LE-NEXT: li r3, 0 284; PWR9LE-NEXT: xxswapd v3, v2 285; PWR9LE-NEXT: vaddubm v2, v2, v3 286; PWR9LE-NEXT: xxspltw v3, v2, 2 287; PWR9LE-NEXT: vaddubm v2, v2, v3 288; PWR9LE-NEXT: vsplth v3, v2, 6 289; PWR9LE-NEXT: vaddubm v2, v2, v3 290; PWR9LE-NEXT: vspltb v3, v2, 14 291; PWR9LE-NEXT: vaddubm v2, v2, v3 292; PWR9LE-NEXT: vextubrx r3, r3, v2 293; PWR9LE-NEXT: blr 294; 295; PWR9BE-LABEL: v32i8: 296; PWR9BE: # %bb.0: # %entry 297; PWR9BE-NEXT: vaddubm v2, v2, v3 298; PWR9BE-NEXT: li r3, 0 299; PWR9BE-NEXT: xxswapd v3, v2 300; PWR9BE-NEXT: vaddubm v2, v2, v3 301; PWR9BE-NEXT: xxspltw v3, v2, 1 302; PWR9BE-NEXT: vaddubm v2, v2, v3 303; PWR9BE-NEXT: vsplth v3, v2, 1 304; PWR9BE-NEXT: vaddubm v2, v2, v3 305; PWR9BE-NEXT: vspltb v3, v2, 1 306; PWR9BE-NEXT: vaddubm v2, v2, v3 307; PWR9BE-NEXT: vextublx r3, r3, v2 308; PWR9BE-NEXT: blr 309; 310; PWR10LE-LABEL: v32i8: 311; PWR10LE: # %bb.0: # %entry 312; PWR10LE-NEXT: vaddubm v2, v2, v3 313; PWR10LE-NEXT: li r3, 0 314; PWR10LE-NEXT: xxswapd v3, v2 315; PWR10LE-NEXT: vaddubm v2, v2, v3 316; PWR10LE-NEXT: xxspltw v3, v2, 2 317; PWR10LE-NEXT: vaddubm v2, v2, v3 318; PWR10LE-NEXT: vsplth v3, v2, 6 319; PWR10LE-NEXT: vaddubm v2, v2, v3 320; PWR10LE-NEXT: vspltb v3, v2, 14 321; PWR10LE-NEXT: vaddubm v2, v2, v3 322; PWR10LE-NEXT: vextubrx r3, r3, v2 323; PWR10LE-NEXT: blr 324; 325; PWR10BE-LABEL: v32i8: 326; PWR10BE: # %bb.0: # %entry 327; PWR10BE-NEXT: vaddubm v2, v2, v3 328; PWR10BE-NEXT: li r3, 0 329; PWR10BE-NEXT: xxswapd v3, v2 330; PWR10BE-NEXT: vaddubm v2, v2, v3 331; PWR10BE-NEXT: xxspltw v3, v2, 1 332; PWR10BE-NEXT: vaddubm v2, v2, v3 333; PWR10BE-NEXT: vsplth v3, v2, 1 334; PWR10BE-NEXT: vaddubm v2, v2, v3 335; PWR10BE-NEXT: vspltb v3, v2, 1 336; PWR10BE-NEXT: vaddubm v2, v2, v3 337; PWR10BE-NEXT: vextublx r3, r3, v2 338; PWR10BE-NEXT: blr 339entry: 340 %0 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a) 341 ret i8 %0 342} 343 344declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) #0 345declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) #0 346declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) #0 347declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #0 348declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) #0 349 350;; 351;; Vectors of i16 352;; 353define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 { 354; PWR9LE-LABEL: v2i16: 355; PWR9LE: # %bb.0: # %entry 356; PWR9LE-NEXT: vsplth v3, v2, 6 357; PWR9LE-NEXT: li r3, 0 358; PWR9LE-NEXT: vadduhm v2, v2, v3 359; PWR9LE-NEXT: vextuhrx r3, r3, v2 360; PWR9LE-NEXT: blr 361; 362; PWR9BE-LABEL: v2i16: 363; PWR9BE: # %bb.0: # %entry 364; PWR9BE-NEXT: vsplth v3, v2, 1 365; PWR9BE-NEXT: li r3, 0 366; PWR9BE-NEXT: vadduhm v2, v2, v3 367; PWR9BE-NEXT: vextuhlx r3, r3, v2 368; PWR9BE-NEXT: blr 369; 370; PWR10LE-LABEL: v2i16: 371; PWR10LE: # %bb.0: # %entry 372; PWR10LE-NEXT: vsplth v3, v2, 6 373; PWR10LE-NEXT: li r3, 0 374; PWR10LE-NEXT: vadduhm v2, v2, v3 375; PWR10LE-NEXT: vextuhrx r3, r3, v2 376; PWR10LE-NEXT: blr 377; 378; PWR10BE-LABEL: v2i16: 379; PWR10BE: # %bb.0: # %entry 380; PWR10BE-NEXT: vsplth v3, v2, 1 381; PWR10BE-NEXT: li r3, 0 382; PWR10BE-NEXT: vadduhm v2, v2, v3 383; PWR10BE-NEXT: vextuhlx r3, r3, v2 384; PWR10BE-NEXT: blr 385entry: 386 %0 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a) 387 ret i16 %0 388} 389 390define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 { 391; PWR9LE-LABEL: v4i16: 392; PWR9LE: # %bb.0: # %entry 393; PWR9LE-NEXT: xxspltw v3, v2, 2 394; PWR9LE-NEXT: li r3, 0 395; PWR9LE-NEXT: vadduhm v2, v2, v3 396; PWR9LE-NEXT: vsplth v3, v2, 6 397; PWR9LE-NEXT: vadduhm v2, v2, v3 398; PWR9LE-NEXT: vextuhrx r3, r3, v2 399; PWR9LE-NEXT: blr 400; 401; PWR9BE-LABEL: v4i16: 402; PWR9BE: # %bb.0: # %entry 403; PWR9BE-NEXT: xxspltw v3, v2, 1 404; PWR9BE-NEXT: li r3, 0 405; PWR9BE-NEXT: vadduhm v2, v2, v3 406; PWR9BE-NEXT: vsplth v3, v2, 1 407; PWR9BE-NEXT: vadduhm v2, v2, v3 408; PWR9BE-NEXT: vextuhlx r3, r3, v2 409; PWR9BE-NEXT: blr 410; 411; PWR10LE-LABEL: v4i16: 412; PWR10LE: # %bb.0: # %entry 413; PWR10LE-NEXT: xxspltw v3, v2, 2 414; PWR10LE-NEXT: li r3, 0 415; PWR10LE-NEXT: vadduhm v2, v2, v3 416; PWR10LE-NEXT: vsplth v3, v2, 6 417; PWR10LE-NEXT: vadduhm v2, v2, v3 418; PWR10LE-NEXT: vextuhrx r3, r3, v2 419; PWR10LE-NEXT: blr 420; 421; PWR10BE-LABEL: v4i16: 422; PWR10BE: # %bb.0: # %entry 423; PWR10BE-NEXT: xxspltw v3, v2, 1 424; PWR10BE-NEXT: li r3, 0 425; PWR10BE-NEXT: vadduhm v2, v2, v3 426; PWR10BE-NEXT: vsplth v3, v2, 1 427; PWR10BE-NEXT: vadduhm v2, v2, v3 428; PWR10BE-NEXT: vextuhlx r3, r3, v2 429; PWR10BE-NEXT: blr 430entry: 431 %0 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) 432 ret i16 %0 433} 434 435define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 { 436; PWR9LE-LABEL: v8i16: 437; PWR9LE: # %bb.0: # %entry 438; PWR9LE-NEXT: xxswapd v3, v2 439; PWR9LE-NEXT: li r3, 0 440; PWR9LE-NEXT: vadduhm v2, v2, v3 441; PWR9LE-NEXT: xxspltw v3, v2, 2 442; PWR9LE-NEXT: vadduhm v2, v2, v3 443; PWR9LE-NEXT: vsplth v3, v2, 6 444; PWR9LE-NEXT: vadduhm v2, v2, v3 445; PWR9LE-NEXT: vextuhrx r3, r3, v2 446; PWR9LE-NEXT: blr 447; 448; PWR9BE-LABEL: v8i16: 449; PWR9BE: # %bb.0: # %entry 450; PWR9BE-NEXT: xxswapd v3, v2 451; PWR9BE-NEXT: li r3, 0 452; PWR9BE-NEXT: vadduhm v2, v2, v3 453; PWR9BE-NEXT: xxspltw v3, v2, 1 454; PWR9BE-NEXT: vadduhm v2, v2, v3 455; PWR9BE-NEXT: vsplth v3, v2, 1 456; PWR9BE-NEXT: vadduhm v2, v2, v3 457; PWR9BE-NEXT: vextuhlx r3, r3, v2 458; PWR9BE-NEXT: blr 459; 460; PWR10LE-LABEL: v8i16: 461; PWR10LE: # %bb.0: # %entry 462; PWR10LE-NEXT: xxswapd v3, v2 463; PWR10LE-NEXT: li r3, 0 464; PWR10LE-NEXT: vadduhm v2, v2, v3 465; PWR10LE-NEXT: xxspltw v3, v2, 2 466; PWR10LE-NEXT: vadduhm v2, v2, v3 467; PWR10LE-NEXT: vsplth v3, v2, 6 468; PWR10LE-NEXT: vadduhm v2, v2, v3 469; PWR10LE-NEXT: vextuhrx r3, r3, v2 470; PWR10LE-NEXT: blr 471; 472; PWR10BE-LABEL: v8i16: 473; PWR10BE: # %bb.0: # %entry 474; PWR10BE-NEXT: xxswapd v3, v2 475; PWR10BE-NEXT: li r3, 0 476; PWR10BE-NEXT: vadduhm v2, v2, v3 477; PWR10BE-NEXT: xxspltw v3, v2, 1 478; PWR10BE-NEXT: vadduhm v2, v2, v3 479; PWR10BE-NEXT: vsplth v3, v2, 1 480; PWR10BE-NEXT: vadduhm v2, v2, v3 481; PWR10BE-NEXT: vextuhlx r3, r3, v2 482; PWR10BE-NEXT: blr 483entry: 484 %0 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) 485 ret i16 %0 486} 487 488define dso_local zeroext i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 { 489; PWR9LE-LABEL: v16i16: 490; PWR9LE: # %bb.0: # %entry 491; PWR9LE-NEXT: vadduhm v2, v2, v3 492; PWR9LE-NEXT: li r3, 0 493; PWR9LE-NEXT: xxswapd v3, v2 494; PWR9LE-NEXT: vadduhm v2, v2, v3 495; PWR9LE-NEXT: xxspltw v3, v2, 2 496; PWR9LE-NEXT: vadduhm v2, v2, v3 497; PWR9LE-NEXT: vsplth v3, v2, 6 498; PWR9LE-NEXT: vadduhm v2, v2, v3 499; PWR9LE-NEXT: vextuhrx r3, r3, v2 500; PWR9LE-NEXT: clrldi r3, r3, 48 501; PWR9LE-NEXT: blr 502; 503; PWR9BE-LABEL: v16i16: 504; PWR9BE: # %bb.0: # %entry 505; PWR9BE-NEXT: vadduhm v2, v2, v3 506; PWR9BE-NEXT: li r3, 0 507; PWR9BE-NEXT: xxswapd v3, v2 508; PWR9BE-NEXT: vadduhm v2, v2, v3 509; PWR9BE-NEXT: xxspltw v3, v2, 1 510; PWR9BE-NEXT: vadduhm v2, v2, v3 511; PWR9BE-NEXT: vsplth v3, v2, 1 512; PWR9BE-NEXT: vadduhm v2, v2, v3 513; PWR9BE-NEXT: vextuhlx r3, r3, v2 514; PWR9BE-NEXT: clrldi r3, r3, 48 515; PWR9BE-NEXT: blr 516; 517; PWR10LE-LABEL: v16i16: 518; PWR10LE: # %bb.0: # %entry 519; PWR10LE-NEXT: vadduhm v2, v2, v3 520; PWR10LE-NEXT: li r3, 0 521; PWR10LE-NEXT: xxswapd v3, v2 522; PWR10LE-NEXT: vadduhm v2, v2, v3 523; PWR10LE-NEXT: xxspltw v3, v2, 2 524; PWR10LE-NEXT: vadduhm v2, v2, v3 525; PWR10LE-NEXT: vsplth v3, v2, 6 526; PWR10LE-NEXT: vadduhm v2, v2, v3 527; PWR10LE-NEXT: vextuhrx r3, r3, v2 528; PWR10LE-NEXT: clrldi r3, r3, 48 529; PWR10LE-NEXT: blr 530; 531; PWR10BE-LABEL: v16i16: 532; PWR10BE: # %bb.0: # %entry 533; PWR10BE-NEXT: vadduhm v2, v2, v3 534; PWR10BE-NEXT: li r3, 0 535; PWR10BE-NEXT: xxswapd v3, v2 536; PWR10BE-NEXT: vadduhm v2, v2, v3 537; PWR10BE-NEXT: xxspltw v3, v2, 1 538; PWR10BE-NEXT: vadduhm v2, v2, v3 539; PWR10BE-NEXT: vsplth v3, v2, 1 540; PWR10BE-NEXT: vadduhm v2, v2, v3 541; PWR10BE-NEXT: vextuhlx r3, r3, v2 542; PWR10BE-NEXT: clrldi r3, r3, 48 543; PWR10BE-NEXT: blr 544entry: 545 %0 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a) 546 ret i16 %0 547} 548 549define dso_local signext i16 @v16i8tov16i16_sign(<16 x i8> %a) local_unnamed_addr #0 { 550; PWR9LE-LABEL: v16i8tov16i16_sign: 551; PWR9LE: # %bb.0: # %entry 552; PWR9LE-NEXT: vmrghb v3, v2, v2 553; PWR9LE-NEXT: vspltish v4, 8 554; PWR9LE-NEXT: li r3, 0 555; PWR9LE-NEXT: vmrglb v2, v2, v2 556; PWR9LE-NEXT: vslh v3, v3, v4 557; PWR9LE-NEXT: vslh v2, v2, v4 558; PWR9LE-NEXT: vsrah v3, v3, v4 559; PWR9LE-NEXT: vsrah v2, v2, v4 560; PWR9LE-NEXT: vadduhm v2, v2, v3 561; PWR9LE-NEXT: xxswapd v3, v2 562; PWR9LE-NEXT: vadduhm v2, v2, v3 563; PWR9LE-NEXT: xxspltw v3, v2, 2 564; PWR9LE-NEXT: vadduhm v2, v2, v3 565; PWR9LE-NEXT: vsplth v3, v2, 6 566; PWR9LE-NEXT: vadduhm v2, v2, v3 567; PWR9LE-NEXT: vextuhrx r3, r3, v2 568; PWR9LE-NEXT: extsh r3, r3 569; PWR9LE-NEXT: blr 570; 571; PWR9BE-LABEL: v16i8tov16i16_sign: 572; PWR9BE: # %bb.0: # %entry 573; PWR9BE-NEXT: vmrglb v3, v2, v2 574; PWR9BE-NEXT: vspltish v4, 8 575; PWR9BE-NEXT: li r3, 0 576; PWR9BE-NEXT: vmrghb v2, v2, v2 577; PWR9BE-NEXT: vslh v3, v3, v4 578; PWR9BE-NEXT: vslh v2, v2, v4 579; PWR9BE-NEXT: vsrah v3, v3, v4 580; PWR9BE-NEXT: vsrah v2, v2, v4 581; PWR9BE-NEXT: vadduhm v2, v2, v3 582; PWR9BE-NEXT: xxswapd v3, v2 583; PWR9BE-NEXT: vadduhm v2, v2, v3 584; PWR9BE-NEXT: xxspltw v3, v2, 1 585; PWR9BE-NEXT: vadduhm v2, v2, v3 586; PWR9BE-NEXT: vsplth v3, v2, 1 587; PWR9BE-NEXT: vadduhm v2, v2, v3 588; PWR9BE-NEXT: vextuhlx r3, r3, v2 589; PWR9BE-NEXT: extsh r3, r3 590; PWR9BE-NEXT: blr 591; 592; PWR10LE-LABEL: v16i8tov16i16_sign: 593; PWR10LE: # %bb.0: # %entry 594; PWR10LE-NEXT: vmrghb v3, v2, v2 595; PWR10LE-NEXT: xxspltiw v4, 524296 596; PWR10LE-NEXT: vmrglb v2, v2, v2 597; PWR10LE-NEXT: li r3, 0 598; PWR10LE-NEXT: vslh v3, v3, v4 599; PWR10LE-NEXT: vslh v2, v2, v4 600; PWR10LE-NEXT: vsrah v3, v3, v4 601; PWR10LE-NEXT: vsrah v2, v2, v4 602; PWR10LE-NEXT: vadduhm v2, v2, v3 603; PWR10LE-NEXT: xxswapd v3, v2 604; PWR10LE-NEXT: vadduhm v2, v2, v3 605; PWR10LE-NEXT: xxspltw v3, v2, 2 606; PWR10LE-NEXT: vadduhm v2, v2, v3 607; PWR10LE-NEXT: vsplth v3, v2, 6 608; PWR10LE-NEXT: vadduhm v2, v2, v3 609; PWR10LE-NEXT: vextuhrx r3, r3, v2 610; PWR10LE-NEXT: extsh r3, r3 611; PWR10LE-NEXT: blr 612; 613; PWR10BE-LABEL: v16i8tov16i16_sign: 614; PWR10BE: # %bb.0: # %entry 615; PWR10BE-NEXT: vmrglb v3, v2, v2 616; PWR10BE-NEXT: xxspltiw v4, 524296 617; PWR10BE-NEXT: vmrghb v2, v2, v2 618; PWR10BE-NEXT: li r3, 0 619; PWR10BE-NEXT: vslh v3, v3, v4 620; PWR10BE-NEXT: vslh v2, v2, v4 621; PWR10BE-NEXT: vsrah v3, v3, v4 622; PWR10BE-NEXT: vsrah v2, v2, v4 623; PWR10BE-NEXT: vadduhm v2, v2, v3 624; PWR10BE-NEXT: xxswapd v3, v2 625; PWR10BE-NEXT: vadduhm v2, v2, v3 626; PWR10BE-NEXT: xxspltw v3, v2, 1 627; PWR10BE-NEXT: vadduhm v2, v2, v3 628; PWR10BE-NEXT: vsplth v3, v2, 1 629; PWR10BE-NEXT: vadduhm v2, v2, v3 630; PWR10BE-NEXT: vextuhlx r3, r3, v2 631; PWR10BE-NEXT: extsh r3, r3 632; PWR10BE-NEXT: blr 633entry: 634 %0 = sext <16 x i8> %a to <16 x i16> 635 %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0) 636 ret i16 %1 637} 638 639define dso_local zeroext i16 @v16i8tov16i16_zero(<16 x i8> %a) local_unnamed_addr #0 { 640; PWR9LE-LABEL: v16i8tov16i16_zero: 641; PWR9LE: # %bb.0: # %entry 642; PWR9LE-NEXT: xxlxor v3, v3, v3 643; PWR9LE-NEXT: li r3, 0 644; PWR9LE-NEXT: vmrghb v4, v3, v2 645; PWR9LE-NEXT: vmrglb v2, v3, v2 646; PWR9LE-NEXT: vadduhm v2, v2, v4 647; PWR9LE-NEXT: xxswapd v3, v2 648; PWR9LE-NEXT: vadduhm v2, v2, v3 649; PWR9LE-NEXT: xxspltw v3, v2, 2 650; PWR9LE-NEXT: vadduhm v2, v2, v3 651; PWR9LE-NEXT: vsplth v3, v2, 6 652; PWR9LE-NEXT: vadduhm v2, v2, v3 653; PWR9LE-NEXT: vextuhrx r3, r3, v2 654; PWR9LE-NEXT: clrldi r3, r3, 48 655; PWR9LE-NEXT: blr 656; 657; PWR9BE-LABEL: v16i8tov16i16_zero: 658; PWR9BE: # %bb.0: # %entry 659; PWR9BE-NEXT: xxlxor v3, v3, v3 660; PWR9BE-NEXT: li r3, 0 661; PWR9BE-NEXT: vmrglb v4, v3, v2 662; PWR9BE-NEXT: vmrghb v2, v3, v2 663; PWR9BE-NEXT: vadduhm v2, v2, v4 664; PWR9BE-NEXT: xxswapd v3, v2 665; PWR9BE-NEXT: vadduhm v2, v2, v3 666; PWR9BE-NEXT: xxspltw v3, v2, 1 667; PWR9BE-NEXT: vadduhm v2, v2, v3 668; PWR9BE-NEXT: vsplth v3, v2, 1 669; PWR9BE-NEXT: vadduhm v2, v2, v3 670; PWR9BE-NEXT: vextuhlx r3, r3, v2 671; PWR9BE-NEXT: clrldi r3, r3, 48 672; PWR9BE-NEXT: blr 673; 674; PWR10LE-LABEL: v16i8tov16i16_zero: 675; PWR10LE: # %bb.0: # %entry 676; PWR10LE-NEXT: xxlxor v3, v3, v3 677; PWR10LE-NEXT: li r3, 0 678; PWR10LE-NEXT: vmrghb v4, v3, v2 679; PWR10LE-NEXT: vmrglb v2, v3, v2 680; PWR10LE-NEXT: vadduhm v2, v2, v4 681; PWR10LE-NEXT: xxswapd v3, v2 682; PWR10LE-NEXT: vadduhm v2, v2, v3 683; PWR10LE-NEXT: xxspltw v3, v2, 2 684; PWR10LE-NEXT: vadduhm v2, v2, v3 685; PWR10LE-NEXT: vsplth v3, v2, 6 686; PWR10LE-NEXT: vadduhm v2, v2, v3 687; PWR10LE-NEXT: vextuhrx r3, r3, v2 688; PWR10LE-NEXT: clrldi r3, r3, 48 689; PWR10LE-NEXT: blr 690; 691; PWR10BE-LABEL: v16i8tov16i16_zero: 692; PWR10BE: # %bb.0: # %entry 693; PWR10BE-NEXT: xxlxor v3, v3, v3 694; PWR10BE-NEXT: li r3, 0 695; PWR10BE-NEXT: vmrglb v4, v3, v2 696; PWR10BE-NEXT: vmrghb v2, v3, v2 697; PWR10BE-NEXT: vadduhm v2, v2, v4 698; PWR10BE-NEXT: xxswapd v3, v2 699; PWR10BE-NEXT: vadduhm v2, v2, v3 700; PWR10BE-NEXT: xxspltw v3, v2, 1 701; PWR10BE-NEXT: vadduhm v2, v2, v3 702; PWR10BE-NEXT: vsplth v3, v2, 1 703; PWR10BE-NEXT: vadduhm v2, v2, v3 704; PWR10BE-NEXT: vextuhlx r3, r3, v2 705; PWR10BE-NEXT: clrldi r3, r3, 48 706; PWR10BE-NEXT: blr 707entry: 708 %0 = zext <16 x i8> %a to <16 x i16> 709 %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0) 710 ret i16 %1 711} 712 713declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) #0 714declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0 715declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #0 716declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0 717 718;; 719;; Vectors of i32 720;; 721define dso_local zeroext i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 { 722; PWR9LE-LABEL: v2i32: 723; PWR9LE: # %bb.0: # %entry 724; PWR9LE-NEXT: xxspltw v3, v2, 2 725; PWR9LE-NEXT: li r3, 0 726; PWR9LE-NEXT: vadduwm v2, v2, v3 727; PWR9LE-NEXT: vextuwrx r3, r3, v2 728; PWR9LE-NEXT: blr 729; 730; PWR9BE-LABEL: v2i32: 731; PWR9BE: # %bb.0: # %entry 732; PWR9BE-NEXT: xxspltw v3, v2, 1 733; PWR9BE-NEXT: li r3, 0 734; PWR9BE-NEXT: vadduwm v2, v2, v3 735; PWR9BE-NEXT: vextuwlx r3, r3, v2 736; PWR9BE-NEXT: blr 737; 738; PWR10LE-LABEL: v2i32: 739; PWR10LE: # %bb.0: # %entry 740; PWR10LE-NEXT: xxspltw v3, v2, 2 741; PWR10LE-NEXT: li r3, 0 742; PWR10LE-NEXT: vadduwm v2, v2, v3 743; PWR10LE-NEXT: vextuwrx r3, r3, v2 744; PWR10LE-NEXT: blr 745; 746; PWR10BE-LABEL: v2i32: 747; PWR10BE: # %bb.0: # %entry 748; PWR10BE-NEXT: xxspltw v3, v2, 1 749; PWR10BE-NEXT: li r3, 0 750; PWR10BE-NEXT: vadduwm v2, v2, v3 751; PWR10BE-NEXT: vextuwlx r3, r3, v2 752; PWR10BE-NEXT: blr 753entry: 754 %0 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) 755 ret i32 %0 756} 757 758define dso_local zeroext i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 { 759; PWR9LE-LABEL: v4i32: 760; PWR9LE: # %bb.0: # %entry 761; PWR9LE-NEXT: xxswapd v3, v2 762; PWR9LE-NEXT: li r3, 0 763; PWR9LE-NEXT: vadduwm v2, v2, v3 764; PWR9LE-NEXT: xxspltw v3, v2, 2 765; PWR9LE-NEXT: vadduwm v2, v2, v3 766; PWR9LE-NEXT: vextuwrx r3, r3, v2 767; PWR9LE-NEXT: blr 768; 769; PWR9BE-LABEL: v4i32: 770; PWR9BE: # %bb.0: # %entry 771; PWR9BE-NEXT: xxswapd v3, v2 772; PWR9BE-NEXT: li r3, 0 773; PWR9BE-NEXT: vadduwm v2, v2, v3 774; PWR9BE-NEXT: xxspltw v3, v2, 1 775; PWR9BE-NEXT: vadduwm v2, v2, v3 776; PWR9BE-NEXT: vextuwlx r3, r3, v2 777; PWR9BE-NEXT: blr 778; 779; PWR10LE-LABEL: v4i32: 780; PWR10LE: # %bb.0: # %entry 781; PWR10LE-NEXT: xxswapd v3, v2 782; PWR10LE-NEXT: li r3, 0 783; PWR10LE-NEXT: vadduwm v2, v2, v3 784; PWR10LE-NEXT: xxspltw v3, v2, 2 785; PWR10LE-NEXT: vadduwm v2, v2, v3 786; PWR10LE-NEXT: vextuwrx r3, r3, v2 787; PWR10LE-NEXT: blr 788; 789; PWR10BE-LABEL: v4i32: 790; PWR10BE: # %bb.0: # %entry 791; PWR10BE-NEXT: xxswapd v3, v2 792; PWR10BE-NEXT: li r3, 0 793; PWR10BE-NEXT: vadduwm v2, v2, v3 794; PWR10BE-NEXT: xxspltw v3, v2, 1 795; PWR10BE-NEXT: vadduwm v2, v2, v3 796; PWR10BE-NEXT: vextuwlx r3, r3, v2 797; PWR10BE-NEXT: blr 798entry: 799 %0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) 800 ret i32 %0 801} 802 803define dso_local zeroext i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 { 804; PWR9LE-LABEL: v8i32: 805; PWR9LE: # %bb.0: # %entry 806; PWR9LE-NEXT: vadduwm v2, v2, v3 807; PWR9LE-NEXT: li r3, 0 808; PWR9LE-NEXT: xxswapd v3, v2 809; PWR9LE-NEXT: vadduwm v2, v2, v3 810; PWR9LE-NEXT: xxspltw v3, v2, 2 811; PWR9LE-NEXT: vadduwm v2, v2, v3 812; PWR9LE-NEXT: vextuwrx r3, r3, v2 813; PWR9LE-NEXT: blr 814; 815; PWR9BE-LABEL: v8i32: 816; PWR9BE: # %bb.0: # %entry 817; PWR9BE-NEXT: vadduwm v2, v2, v3 818; PWR9BE-NEXT: li r3, 0 819; PWR9BE-NEXT: xxswapd v3, v2 820; PWR9BE-NEXT: vadduwm v2, v2, v3 821; PWR9BE-NEXT: xxspltw v3, v2, 1 822; PWR9BE-NEXT: vadduwm v2, v2, v3 823; PWR9BE-NEXT: vextuwlx r3, r3, v2 824; PWR9BE-NEXT: blr 825; 826; PWR10LE-LABEL: v8i32: 827; PWR10LE: # %bb.0: # %entry 828; PWR10LE-NEXT: vadduwm v2, v2, v3 829; PWR10LE-NEXT: li r3, 0 830; PWR10LE-NEXT: xxswapd v3, v2 831; PWR10LE-NEXT: vadduwm v2, v2, v3 832; PWR10LE-NEXT: xxspltw v3, v2, 2 833; PWR10LE-NEXT: vadduwm v2, v2, v3 834; PWR10LE-NEXT: vextuwrx r3, r3, v2 835; PWR10LE-NEXT: blr 836; 837; PWR10BE-LABEL: v8i32: 838; PWR10BE: # %bb.0: # %entry 839; PWR10BE-NEXT: vadduwm v2, v2, v3 840; PWR10BE-NEXT: li r3, 0 841; PWR10BE-NEXT: xxswapd v3, v2 842; PWR10BE-NEXT: vadduwm v2, v2, v3 843; PWR10BE-NEXT: xxspltw v3, v2, 1 844; PWR10BE-NEXT: vadduwm v2, v2, v3 845; PWR10BE-NEXT: vextuwlx r3, r3, v2 846; PWR10BE-NEXT: blr 847entry: 848 %0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a) 849 ret i32 %0 850} 851 852define dso_local zeroext i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 { 853; PWR9LE-LABEL: v16i32: 854; PWR9LE: # %bb.0: # %entry 855; PWR9LE-NEXT: vadduwm v3, v3, v5 856; PWR9LE-NEXT: vadduwm v2, v2, v4 857; PWR9LE-NEXT: li r3, 0 858; PWR9LE-NEXT: vadduwm v2, v2, v3 859; PWR9LE-NEXT: xxswapd v3, v2 860; PWR9LE-NEXT: vadduwm v2, v2, v3 861; PWR9LE-NEXT: xxspltw v3, v2, 2 862; PWR9LE-NEXT: vadduwm v2, v2, v3 863; PWR9LE-NEXT: vextuwrx r3, r3, v2 864; PWR9LE-NEXT: blr 865; 866; PWR9BE-LABEL: v16i32: 867; PWR9BE: # %bb.0: # %entry 868; PWR9BE-NEXT: vadduwm v3, v3, v5 869; PWR9BE-NEXT: vadduwm v2, v2, v4 870; PWR9BE-NEXT: li r3, 0 871; PWR9BE-NEXT: vadduwm v2, v2, v3 872; PWR9BE-NEXT: xxswapd v3, v2 873; PWR9BE-NEXT: vadduwm v2, v2, v3 874; PWR9BE-NEXT: xxspltw v3, v2, 1 875; PWR9BE-NEXT: vadduwm v2, v2, v3 876; PWR9BE-NEXT: vextuwlx r3, r3, v2 877; PWR9BE-NEXT: blr 878; 879; PWR10LE-LABEL: v16i32: 880; PWR10LE: # %bb.0: # %entry 881; PWR10LE-NEXT: vadduwm v3, v3, v5 882; PWR10LE-NEXT: vadduwm v2, v2, v4 883; PWR10LE-NEXT: li r3, 0 884; PWR10LE-NEXT: vadduwm v2, v2, v3 885; PWR10LE-NEXT: xxswapd v3, v2 886; PWR10LE-NEXT: vadduwm v2, v2, v3 887; PWR10LE-NEXT: xxspltw v3, v2, 2 888; PWR10LE-NEXT: vadduwm v2, v2, v3 889; PWR10LE-NEXT: vextuwrx r3, r3, v2 890; PWR10LE-NEXT: blr 891; 892; PWR10BE-LABEL: v16i32: 893; PWR10BE: # %bb.0: # %entry 894; PWR10BE-NEXT: vadduwm v3, v3, v5 895; PWR10BE-NEXT: vadduwm v2, v2, v4 896; PWR10BE-NEXT: li r3, 0 897; PWR10BE-NEXT: vadduwm v2, v2, v3 898; PWR10BE-NEXT: xxswapd v3, v2 899; PWR10BE-NEXT: vadduwm v2, v2, v3 900; PWR10BE-NEXT: xxspltw v3, v2, 1 901; PWR10BE-NEXT: vadduwm v2, v2, v3 902; PWR10BE-NEXT: vextuwlx r3, r3, v2 903; PWR10BE-NEXT: blr 904entry: 905 %0 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) 906 ret i32 %0 907} 908 909define dso_local zeroext i32 @v32i32(<32 x i32> %a) local_unnamed_addr #0 { 910; PWR9LE-LABEL: v32i32: 911; PWR9LE: # %bb.0: # %entry 912; PWR9LE-NEXT: vadduwm v4, v4, v8 913; PWR9LE-NEXT: vadduwm v2, v2, v6 914; PWR9LE-NEXT: li r3, 0 915; PWR9LE-NEXT: vadduwm v5, v5, v9 916; PWR9LE-NEXT: vadduwm v3, v3, v7 917; PWR9LE-NEXT: vadduwm v3, v3, v5 918; PWR9LE-NEXT: vadduwm v2, v2, v4 919; PWR9LE-NEXT: vadduwm v2, v2, v3 920; PWR9LE-NEXT: xxswapd v3, v2 921; PWR9LE-NEXT: vadduwm v2, v2, v3 922; PWR9LE-NEXT: xxspltw v3, v2, 2 923; PWR9LE-NEXT: vadduwm v2, v2, v3 924; PWR9LE-NEXT: vextuwrx r3, r3, v2 925; PWR9LE-NEXT: blr 926; 927; PWR9BE-LABEL: v32i32: 928; PWR9BE: # %bb.0: # %entry 929; PWR9BE-NEXT: vadduwm v4, v4, v8 930; PWR9BE-NEXT: vadduwm v2, v2, v6 931; PWR9BE-NEXT: li r3, 0 932; PWR9BE-NEXT: vadduwm v5, v5, v9 933; PWR9BE-NEXT: vadduwm v3, v3, v7 934; PWR9BE-NEXT: vadduwm v3, v3, v5 935; PWR9BE-NEXT: vadduwm v2, v2, v4 936; PWR9BE-NEXT: vadduwm v2, v2, v3 937; PWR9BE-NEXT: xxswapd v3, v2 938; PWR9BE-NEXT: vadduwm v2, v2, v3 939; PWR9BE-NEXT: xxspltw v3, v2, 1 940; PWR9BE-NEXT: vadduwm v2, v2, v3 941; PWR9BE-NEXT: vextuwlx r3, r3, v2 942; PWR9BE-NEXT: blr 943; 944; PWR10LE-LABEL: v32i32: 945; PWR10LE: # %bb.0: # %entry 946; PWR10LE-NEXT: vadduwm v4, v4, v8 947; PWR10LE-NEXT: vadduwm v2, v2, v6 948; PWR10LE-NEXT: vadduwm v5, v5, v9 949; PWR10LE-NEXT: vadduwm v3, v3, v7 950; PWR10LE-NEXT: li r3, 0 951; PWR10LE-NEXT: vadduwm v3, v3, v5 952; PWR10LE-NEXT: vadduwm v2, v2, v4 953; PWR10LE-NEXT: vadduwm v2, v2, v3 954; PWR10LE-NEXT: xxswapd v3, v2 955; PWR10LE-NEXT: vadduwm v2, v2, v3 956; PWR10LE-NEXT: xxspltw v3, v2, 2 957; PWR10LE-NEXT: vadduwm v2, v2, v3 958; PWR10LE-NEXT: vextuwrx r3, r3, v2 959; PWR10LE-NEXT: blr 960; 961; PWR10BE-LABEL: v32i32: 962; PWR10BE: # %bb.0: # %entry 963; PWR10BE-NEXT: vadduwm v4, v4, v8 964; PWR10BE-NEXT: vadduwm v2, v2, v6 965; PWR10BE-NEXT: vadduwm v5, v5, v9 966; PWR10BE-NEXT: vadduwm v3, v3, v7 967; PWR10BE-NEXT: li r3, 0 968; PWR10BE-NEXT: vadduwm v3, v3, v5 969; PWR10BE-NEXT: vadduwm v2, v2, v4 970; PWR10BE-NEXT: vadduwm v2, v2, v3 971; PWR10BE-NEXT: xxswapd v3, v2 972; PWR10BE-NEXT: vadduwm v2, v2, v3 973; PWR10BE-NEXT: xxspltw v3, v2, 1 974; PWR10BE-NEXT: vadduwm v2, v2, v3 975; PWR10BE-NEXT: vextuwlx r3, r3, v2 976; PWR10BE-NEXT: blr 977entry: 978 %0 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a) 979 ret i32 %0 980} 981 982define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_addr #0 { 983; PWR9LE-LABEL: v16i8tov16i32_sign: 984; PWR9LE: # %bb.0: # %entry 985; PWR9LE-NEXT: addis r3, r2, .LCPI17_0@toc@ha 986; PWR9LE-NEXT: addi r3, r3, .LCPI17_0@toc@l 987; PWR9LE-NEXT: lxv v3, 0(r3) 988; PWR9LE-NEXT: addis r3, r2, .LCPI17_1@toc@ha 989; PWR9LE-NEXT: addi r3, r3, .LCPI17_1@toc@l 990; PWR9LE-NEXT: lxv v4, 0(r3) 991; PWR9LE-NEXT: addis r3, r2, .LCPI17_2@toc@ha 992; PWR9LE-NEXT: vperm v3, v2, v2, v3 993; PWR9LE-NEXT: addi r3, r3, .LCPI17_2@toc@l 994; PWR9LE-NEXT: lxv v5, 0(r3) 995; PWR9LE-NEXT: addis r3, r2, .LCPI17_3@toc@ha 996; PWR9LE-NEXT: vextsb2w v3, v3 997; PWR9LE-NEXT: vperm v4, v2, v2, v4 998; PWR9LE-NEXT: addi r3, r3, .LCPI17_3@toc@l 999; PWR9LE-NEXT: lxv v0, 0(r3) 1000; PWR9LE-NEXT: vextsb2w v4, v4 1001; PWR9LE-NEXT: li r3, 0 1002; PWR9LE-NEXT: vperm v5, v2, v2, v5 1003; PWR9LE-NEXT: vadduwm v3, v4, v3 1004; PWR9LE-NEXT: vextsb2w v5, v5 1005; PWR9LE-NEXT: vperm v2, v2, v2, v0 1006; PWR9LE-NEXT: vextsb2w v2, v2 1007; PWR9LE-NEXT: vadduwm v2, v2, v5 1008; PWR9LE-NEXT: vadduwm v2, v3, v2 1009; PWR9LE-NEXT: xxswapd v3, v2 1010; PWR9LE-NEXT: vadduwm v2, v2, v3 1011; PWR9LE-NEXT: xxspltw v3, v2, 2 1012; PWR9LE-NEXT: vadduwm v2, v2, v3 1013; PWR9LE-NEXT: vextuwrx r3, r3, v2 1014; PWR9LE-NEXT: extsw r3, r3 1015; PWR9LE-NEXT: blr 1016; 1017; PWR9BE-LABEL: v16i8tov16i32_sign: 1018; PWR9BE: # %bb.0: # %entry 1019; PWR9BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha 1020; PWR9BE-NEXT: addi r3, r3, .LCPI17_0@toc@l 1021; PWR9BE-NEXT: lxv v3, 0(r3) 1022; PWR9BE-NEXT: addis r3, r2, .LCPI17_1@toc@ha 1023; PWR9BE-NEXT: addi r3, r3, .LCPI17_1@toc@l 1024; PWR9BE-NEXT: lxv v4, 0(r3) 1025; PWR9BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha 1026; PWR9BE-NEXT: vperm v3, v2, v2, v3 1027; PWR9BE-NEXT: addi r3, r3, .LCPI17_2@toc@l 1028; PWR9BE-NEXT: lxv v5, 0(r3) 1029; PWR9BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha 1030; PWR9BE-NEXT: vextsb2w v3, v3 1031; PWR9BE-NEXT: vperm v4, v2, v2, v4 1032; PWR9BE-NEXT: addi r3, r3, .LCPI17_3@toc@l 1033; PWR9BE-NEXT: lxv v0, 0(r3) 1034; PWR9BE-NEXT: vextsb2w v4, v4 1035; PWR9BE-NEXT: li r3, 0 1036; PWR9BE-NEXT: vperm v5, v2, v2, v5 1037; PWR9BE-NEXT: vadduwm v3, v4, v3 1038; PWR9BE-NEXT: vextsb2w v5, v5 1039; PWR9BE-NEXT: vperm v2, v2, v2, v0 1040; PWR9BE-NEXT: vextsb2w v2, v2 1041; PWR9BE-NEXT: vadduwm v2, v2, v5 1042; PWR9BE-NEXT: vadduwm v2, v3, v2 1043; PWR9BE-NEXT: xxswapd v3, v2 1044; PWR9BE-NEXT: vadduwm v2, v2, v3 1045; PWR9BE-NEXT: xxspltw v3, v2, 1 1046; PWR9BE-NEXT: vadduwm v2, v2, v3 1047; PWR9BE-NEXT: vextuwlx r3, r3, v2 1048; PWR9BE-NEXT: extsw r3, r3 1049; PWR9BE-NEXT: blr 1050; 1051; PWR10LE-LABEL: v16i8tov16i32_sign: 1052; PWR10LE: # %bb.0: # %entry 1053; PWR10LE-NEXT: plxv v3, .LCPI17_0@PCREL(0), 1 1054; PWR10LE-NEXT: plxv v4, .LCPI17_1@PCREL(0), 1 1055; PWR10LE-NEXT: li r3, 0 1056; PWR10LE-NEXT: vperm v3, v2, v2, v3 1057; PWR10LE-NEXT: plxv v5, .LCPI17_2@PCREL(0), 1 1058; PWR10LE-NEXT: plxv v0, .LCPI17_3@PCREL(0), 1 1059; PWR10LE-NEXT: vperm v4, v2, v2, v4 1060; PWR10LE-NEXT: vperm v5, v2, v2, v5 1061; PWR10LE-NEXT: vperm v2, v2, v2, v0 1062; PWR10LE-NEXT: vextsb2w v3, v3 1063; PWR10LE-NEXT: vextsb2w v4, v4 1064; PWR10LE-NEXT: vextsb2w v5, v5 1065; PWR10LE-NEXT: vextsb2w v2, v2 1066; PWR10LE-NEXT: vadduwm v2, v2, v5 1067; PWR10LE-NEXT: vadduwm v3, v4, v3 1068; PWR10LE-NEXT: vadduwm v2, v3, v2 1069; PWR10LE-NEXT: xxswapd v3, v2 1070; PWR10LE-NEXT: vadduwm v2, v2, v3 1071; PWR10LE-NEXT: xxspltw v3, v2, 2 1072; PWR10LE-NEXT: vadduwm v2, v2, v3 1073; PWR10LE-NEXT: vextuwrx r3, r3, v2 1074; PWR10LE-NEXT: extsw r3, r3 1075; PWR10LE-NEXT: blr 1076; 1077; PWR10BE-LABEL: v16i8tov16i32_sign: 1078; PWR10BE: # %bb.0: # %entry 1079; PWR10BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha 1080; PWR10BE-NEXT: addi r3, r3, .LCPI17_0@toc@l 1081; PWR10BE-NEXT: lxv v3, 0(r3) 1082; PWR10BE-NEXT: addis r3, r2, .LCPI17_1@toc@ha 1083; PWR10BE-NEXT: addi r3, r3, .LCPI17_1@toc@l 1084; PWR10BE-NEXT: lxv v4, 0(r3) 1085; PWR10BE-NEXT: addis r3, r2, .LCPI17_2@toc@ha 1086; PWR10BE-NEXT: vperm v3, v2, v2, v3 1087; PWR10BE-NEXT: addi r3, r3, .LCPI17_2@toc@l 1088; PWR10BE-NEXT: vextsb2w v3, v3 1089; PWR10BE-NEXT: lxv v5, 0(r3) 1090; PWR10BE-NEXT: addis r3, r2, .LCPI17_3@toc@ha 1091; PWR10BE-NEXT: vperm v4, v2, v2, v4 1092; PWR10BE-NEXT: addi r3, r3, .LCPI17_3@toc@l 1093; PWR10BE-NEXT: vextsb2w v4, v4 1094; PWR10BE-NEXT: lxv v0, 0(r3) 1095; PWR10BE-NEXT: li r3, 0 1096; PWR10BE-NEXT: vperm v5, v2, v2, v5 1097; PWR10BE-NEXT: vadduwm v3, v4, v3 1098; PWR10BE-NEXT: vextsb2w v5, v5 1099; PWR10BE-NEXT: vperm v2, v2, v2, v0 1100; PWR10BE-NEXT: vextsb2w v2, v2 1101; PWR10BE-NEXT: vadduwm v2, v2, v5 1102; PWR10BE-NEXT: vadduwm v2, v3, v2 1103; PWR10BE-NEXT: xxswapd v3, v2 1104; PWR10BE-NEXT: vadduwm v2, v2, v3 1105; PWR10BE-NEXT: xxspltw v3, v2, 1 1106; PWR10BE-NEXT: vadduwm v2, v2, v3 1107; PWR10BE-NEXT: vextuwlx r3, r3, v2 1108; PWR10BE-NEXT: extsw r3, r3 1109; PWR10BE-NEXT: blr 1110entry: 1111 %0 = sext <16 x i8> %a to <16 x i32> 1112 %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0) 1113 ret i32 %1 1114} 1115 1116define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_addr #0 { 1117; PWR9LE-LABEL: v16i8tov16i32_zero: 1118; PWR9LE: # %bb.0: # %entry 1119; PWR9LE-NEXT: addis r3, r2, .LCPI18_0@toc@ha 1120; PWR9LE-NEXT: xxlxor v4, v4, v4 1121; PWR9LE-NEXT: addi r3, r3, .LCPI18_0@toc@l 1122; PWR9LE-NEXT: lxv v3, 0(r3) 1123; PWR9LE-NEXT: addis r3, r2, .LCPI18_1@toc@ha 1124; PWR9LE-NEXT: addi r3, r3, .LCPI18_1@toc@l 1125; PWR9LE-NEXT: lxv v5, 0(r3) 1126; PWR9LE-NEXT: addis r3, r2, .LCPI18_2@toc@ha 1127; PWR9LE-NEXT: vperm v3, v4, v2, v3 1128; PWR9LE-NEXT: addi r3, r3, .LCPI18_2@toc@l 1129; PWR9LE-NEXT: lxv v0, 0(r3) 1130; PWR9LE-NEXT: addis r3, r2, .LCPI18_3@toc@ha 1131; PWR9LE-NEXT: vperm v5, v4, v2, v5 1132; PWR9LE-NEXT: addi r3, r3, .LCPI18_3@toc@l 1133; PWR9LE-NEXT: lxv v1, 0(r3) 1134; PWR9LE-NEXT: vadduwm v3, v5, v3 1135; PWR9LE-NEXT: li r3, 0 1136; PWR9LE-NEXT: vperm v0, v4, v2, v0 1137; PWR9LE-NEXT: vperm v2, v4, v2, v1 1138; PWR9LE-NEXT: vadduwm v2, v2, v0 1139; PWR9LE-NEXT: vadduwm v2, v3, v2 1140; PWR9LE-NEXT: xxswapd v3, v2 1141; PWR9LE-NEXT: vadduwm v2, v2, v3 1142; PWR9LE-NEXT: xxspltw v3, v2, 2 1143; PWR9LE-NEXT: vadduwm v2, v2, v3 1144; PWR9LE-NEXT: vextuwrx r3, r3, v2 1145; PWR9LE-NEXT: blr 1146; 1147; PWR9BE-LABEL: v16i8tov16i32_zero: 1148; PWR9BE: # %bb.0: # %entry 1149; PWR9BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha 1150; PWR9BE-NEXT: xxlxor v4, v4, v4 1151; PWR9BE-NEXT: addi r3, r3, .LCPI18_0@toc@l 1152; PWR9BE-NEXT: lxv v3, 0(r3) 1153; PWR9BE-NEXT: addis r3, r2, .LCPI18_1@toc@ha 1154; PWR9BE-NEXT: addi r3, r3, .LCPI18_1@toc@l 1155; PWR9BE-NEXT: lxv v5, 0(r3) 1156; PWR9BE-NEXT: addis r3, r2, .LCPI18_2@toc@ha 1157; PWR9BE-NEXT: vperm v3, v4, v2, v3 1158; PWR9BE-NEXT: addi r3, r3, .LCPI18_2@toc@l 1159; PWR9BE-NEXT: lxv v0, 0(r3) 1160; PWR9BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha 1161; PWR9BE-NEXT: vperm v5, v4, v2, v5 1162; PWR9BE-NEXT: addi r3, r3, .LCPI18_3@toc@l 1163; PWR9BE-NEXT: lxv v1, 0(r3) 1164; PWR9BE-NEXT: vadduwm v3, v5, v3 1165; PWR9BE-NEXT: li r3, 0 1166; PWR9BE-NEXT: vperm v0, v4, v2, v0 1167; PWR9BE-NEXT: vperm v2, v4, v2, v1 1168; PWR9BE-NEXT: vadduwm v2, v2, v0 1169; PWR9BE-NEXT: vadduwm v2, v3, v2 1170; PWR9BE-NEXT: xxswapd v3, v2 1171; PWR9BE-NEXT: vadduwm v2, v2, v3 1172; PWR9BE-NEXT: xxspltw v3, v2, 1 1173; PWR9BE-NEXT: vadduwm v2, v2, v3 1174; PWR9BE-NEXT: vextuwlx r3, r3, v2 1175; PWR9BE-NEXT: blr 1176; 1177; PWR10LE-LABEL: v16i8tov16i32_zero: 1178; PWR10LE: # %bb.0: # %entry 1179; PWR10LE-NEXT: plxv v3, .LCPI18_0@PCREL(0), 1 1180; PWR10LE-NEXT: plxv v5, .LCPI18_1@PCREL(0), 1 1181; PWR10LE-NEXT: xxlxor v4, v4, v4 1182; PWR10LE-NEXT: li r3, 0 1183; PWR10LE-NEXT: vperm v3, v4, v2, v3 1184; PWR10LE-NEXT: plxv v0, .LCPI18_2@PCREL(0), 1 1185; PWR10LE-NEXT: plxv v1, .LCPI18_3@PCREL(0), 1 1186; PWR10LE-NEXT: vperm v5, v4, v2, v5 1187; PWR10LE-NEXT: vperm v0, v4, v2, v0 1188; PWR10LE-NEXT: vperm v2, v4, v2, v1 1189; PWR10LE-NEXT: vadduwm v2, v2, v0 1190; PWR10LE-NEXT: vadduwm v3, v5, v3 1191; PWR10LE-NEXT: vadduwm v2, v3, v2 1192; PWR10LE-NEXT: xxswapd v3, v2 1193; PWR10LE-NEXT: vadduwm v2, v2, v3 1194; PWR10LE-NEXT: xxspltw v3, v2, 2 1195; PWR10LE-NEXT: vadduwm v2, v2, v3 1196; PWR10LE-NEXT: vextuwrx r3, r3, v2 1197; PWR10LE-NEXT: blr 1198; 1199; PWR10BE-LABEL: v16i8tov16i32_zero: 1200; PWR10BE: # %bb.0: # %entry 1201; PWR10BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha 1202; PWR10BE-NEXT: xxlxor v4, v4, v4 1203; PWR10BE-NEXT: addi r3, r3, .LCPI18_0@toc@l 1204; PWR10BE-NEXT: lxv v3, 0(r3) 1205; PWR10BE-NEXT: addis r3, r2, .LCPI18_1@toc@ha 1206; PWR10BE-NEXT: addi r3, r3, .LCPI18_1@toc@l 1207; PWR10BE-NEXT: lxv v5, 0(r3) 1208; PWR10BE-NEXT: addis r3, r2, .LCPI18_2@toc@ha 1209; PWR10BE-NEXT: vperm v3, v4, v2, v3 1210; PWR10BE-NEXT: addi r3, r3, .LCPI18_2@toc@l 1211; PWR10BE-NEXT: lxv v0, 0(r3) 1212; PWR10BE-NEXT: addis r3, r2, .LCPI18_3@toc@ha 1213; PWR10BE-NEXT: vperm v5, v4, v2, v5 1214; PWR10BE-NEXT: addi r3, r3, .LCPI18_3@toc@l 1215; PWR10BE-NEXT: vadduwm v3, v5, v3 1216; PWR10BE-NEXT: lxv v1, 0(r3) 1217; PWR10BE-NEXT: li r3, 0 1218; PWR10BE-NEXT: vperm v0, v4, v2, v0 1219; PWR10BE-NEXT: vperm v2, v4, v2, v1 1220; PWR10BE-NEXT: vadduwm v2, v2, v0 1221; PWR10BE-NEXT: vadduwm v2, v3, v2 1222; PWR10BE-NEXT: xxswapd v3, v2 1223; PWR10BE-NEXT: vadduwm v2, v2, v3 1224; PWR10BE-NEXT: xxspltw v3, v2, 1 1225; PWR10BE-NEXT: vadduwm v2, v2, v3 1226; PWR10BE-NEXT: vextuwlx r3, r3, v2 1227; PWR10BE-NEXT: blr 1228entry: 1229 %0 = zext <16 x i8> %a to <16 x i32> 1230 %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0) 1231 ret i32 %1 1232} 1233 1234declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) #0 1235declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #0 1236declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #0 1237declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #0 1238declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) #0 1239 1240;; 1241;; Vectors of i64 1242;; 1243define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 { 1244; PWR9LE-LABEL: v2i64: 1245; PWR9LE: # %bb.0: # %entry 1246; PWR9LE-NEXT: xxswapd v3, v2 1247; PWR9LE-NEXT: vaddudm v2, v2, v3 1248; PWR9LE-NEXT: mfvsrld r3, v2 1249; PWR9LE-NEXT: blr 1250; 1251; PWR9BE-LABEL: v2i64: 1252; PWR9BE: # %bb.0: # %entry 1253; PWR9BE-NEXT: xxswapd v3, v2 1254; PWR9BE-NEXT: vaddudm v2, v2, v3 1255; PWR9BE-NEXT: mfvsrd r3, v2 1256; PWR9BE-NEXT: blr 1257; 1258; PWR10LE-LABEL: v2i64: 1259; PWR10LE: # %bb.0: # %entry 1260; PWR10LE-NEXT: xxswapd v3, v2 1261; PWR10LE-NEXT: vaddudm v2, v2, v3 1262; PWR10LE-NEXT: mfvsrld r3, v2 1263; PWR10LE-NEXT: blr 1264; 1265; PWR10BE-LABEL: v2i64: 1266; PWR10BE: # %bb.0: # %entry 1267; PWR10BE-NEXT: xxswapd v3, v2 1268; PWR10BE-NEXT: vaddudm v2, v2, v3 1269; PWR10BE-NEXT: mfvsrd r3, v2 1270; PWR10BE-NEXT: blr 1271entry: 1272 %0 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) 1273 ret i64 %0 1274} 1275 1276define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 { 1277; PWR9LE-LABEL: v4i64: 1278; PWR9LE: # %bb.0: # %entry 1279; PWR9LE-NEXT: vaddudm v2, v2, v3 1280; PWR9LE-NEXT: xxswapd v3, v2 1281; PWR9LE-NEXT: vaddudm v2, v2, v3 1282; PWR9LE-NEXT: mfvsrld r3, v2 1283; PWR9LE-NEXT: blr 1284; 1285; PWR9BE-LABEL: v4i64: 1286; PWR9BE: # %bb.0: # %entry 1287; PWR9BE-NEXT: vaddudm v2, v2, v3 1288; PWR9BE-NEXT: xxswapd v3, v2 1289; PWR9BE-NEXT: vaddudm v2, v2, v3 1290; PWR9BE-NEXT: mfvsrd r3, v2 1291; PWR9BE-NEXT: blr 1292; 1293; PWR10LE-LABEL: v4i64: 1294; PWR10LE: # %bb.0: # %entry 1295; PWR10LE-NEXT: vaddudm v2, v2, v3 1296; PWR10LE-NEXT: xxswapd v3, v2 1297; PWR10LE-NEXT: vaddudm v2, v2, v3 1298; PWR10LE-NEXT: mfvsrld r3, v2 1299; PWR10LE-NEXT: blr 1300; 1301; PWR10BE-LABEL: v4i64: 1302; PWR10BE: # %bb.0: # %entry 1303; PWR10BE-NEXT: vaddudm v2, v2, v3 1304; PWR10BE-NEXT: xxswapd v3, v2 1305; PWR10BE-NEXT: vaddudm v2, v2, v3 1306; PWR10BE-NEXT: mfvsrd r3, v2 1307; PWR10BE-NEXT: blr 1308entry: 1309 %0 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a) 1310 ret i64 %0 1311} 1312 1313define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 { 1314; PWR9LE-LABEL: v8i64: 1315; PWR9LE: # %bb.0: # %entry 1316; PWR9LE-NEXT: vaddudm v3, v3, v5 1317; PWR9LE-NEXT: vaddudm v2, v2, v4 1318; PWR9LE-NEXT: vaddudm v2, v2, v3 1319; PWR9LE-NEXT: xxswapd v3, v2 1320; PWR9LE-NEXT: vaddudm v2, v2, v3 1321; PWR9LE-NEXT: mfvsrld r3, v2 1322; PWR9LE-NEXT: blr 1323; 1324; PWR9BE-LABEL: v8i64: 1325; PWR9BE: # %bb.0: # %entry 1326; PWR9BE-NEXT: vaddudm v3, v3, v5 1327; PWR9BE-NEXT: vaddudm v2, v2, v4 1328; PWR9BE-NEXT: vaddudm v2, v2, v3 1329; PWR9BE-NEXT: xxswapd v3, v2 1330; PWR9BE-NEXT: vaddudm v2, v2, v3 1331; PWR9BE-NEXT: mfvsrd r3, v2 1332; PWR9BE-NEXT: blr 1333; 1334; PWR10LE-LABEL: v8i64: 1335; PWR10LE: # %bb.0: # %entry 1336; PWR10LE-NEXT: vaddudm v3, v3, v5 1337; PWR10LE-NEXT: vaddudm v2, v2, v4 1338; PWR10LE-NEXT: vaddudm v2, v2, v3 1339; PWR10LE-NEXT: xxswapd v3, v2 1340; PWR10LE-NEXT: vaddudm v2, v2, v3 1341; PWR10LE-NEXT: mfvsrld r3, v2 1342; PWR10LE-NEXT: blr 1343; 1344; PWR10BE-LABEL: v8i64: 1345; PWR10BE: # %bb.0: # %entry 1346; PWR10BE-NEXT: vaddudm v3, v3, v5 1347; PWR10BE-NEXT: vaddudm v2, v2, v4 1348; PWR10BE-NEXT: vaddudm v2, v2, v3 1349; PWR10BE-NEXT: xxswapd v3, v2 1350; PWR10BE-NEXT: vaddudm v2, v2, v3 1351; PWR10BE-NEXT: mfvsrd r3, v2 1352; PWR10BE-NEXT: blr 1353entry: 1354 %0 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a) 1355 ret i64 %0 1356} 1357 1358define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 { 1359; PWR9LE-LABEL: v16i64: 1360; PWR9LE: # %bb.0: # %entry 1361; PWR9LE-NEXT: vaddudm v4, v4, v8 1362; PWR9LE-NEXT: vaddudm v2, v2, v6 1363; PWR9LE-NEXT: vaddudm v5, v5, v9 1364; PWR9LE-NEXT: vaddudm v3, v3, v7 1365; PWR9LE-NEXT: vaddudm v3, v3, v5 1366; PWR9LE-NEXT: vaddudm v2, v2, v4 1367; PWR9LE-NEXT: vaddudm v2, v2, v3 1368; PWR9LE-NEXT: xxswapd v3, v2 1369; PWR9LE-NEXT: vaddudm v2, v2, v3 1370; PWR9LE-NEXT: mfvsrld r3, v2 1371; PWR9LE-NEXT: blr 1372; 1373; PWR9BE-LABEL: v16i64: 1374; PWR9BE: # %bb.0: # %entry 1375; PWR9BE-NEXT: vaddudm v4, v4, v8 1376; PWR9BE-NEXT: vaddudm v2, v2, v6 1377; PWR9BE-NEXT: vaddudm v5, v5, v9 1378; PWR9BE-NEXT: vaddudm v3, v3, v7 1379; PWR9BE-NEXT: vaddudm v3, v3, v5 1380; PWR9BE-NEXT: vaddudm v2, v2, v4 1381; PWR9BE-NEXT: vaddudm v2, v2, v3 1382; PWR9BE-NEXT: xxswapd v3, v2 1383; PWR9BE-NEXT: vaddudm v2, v2, v3 1384; PWR9BE-NEXT: mfvsrd r3, v2 1385; PWR9BE-NEXT: blr 1386; 1387; PWR10LE-LABEL: v16i64: 1388; PWR10LE: # %bb.0: # %entry 1389; PWR10LE-NEXT: vaddudm v4, v4, v8 1390; PWR10LE-NEXT: vaddudm v5, v5, v9 1391; PWR10LE-NEXT: vaddudm v3, v3, v7 1392; PWR10LE-NEXT: vaddudm v3, v3, v5 1393; PWR10LE-NEXT: vaddudm v2, v2, v6 1394; PWR10LE-NEXT: vaddudm v2, v2, v4 1395; PWR10LE-NEXT: vaddudm v2, v2, v3 1396; PWR10LE-NEXT: xxswapd v3, v2 1397; PWR10LE-NEXT: vaddudm v2, v2, v3 1398; PWR10LE-NEXT: mfvsrld r3, v2 1399; PWR10LE-NEXT: blr 1400; 1401; PWR10BE-LABEL: v16i64: 1402; PWR10BE: # %bb.0: # %entry 1403; PWR10BE-NEXT: vaddudm v4, v4, v8 1404; PWR10BE-NEXT: vaddudm v5, v5, v9 1405; PWR10BE-NEXT: vaddudm v3, v3, v7 1406; PWR10BE-NEXT: vaddudm v3, v3, v5 1407; PWR10BE-NEXT: vaddudm v2, v2, v6 1408; PWR10BE-NEXT: vaddudm v2, v2, v4 1409; PWR10BE-NEXT: vaddudm v2, v2, v3 1410; PWR10BE-NEXT: xxswapd v3, v2 1411; PWR10BE-NEXT: vaddudm v2, v2, v3 1412; PWR10BE-NEXT: mfvsrd r3, v2 1413; PWR10BE-NEXT: blr 1414entry: 1415 %0 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a) 1416 ret i64 %0 1417} 1418 1419define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { 1420; PWR9LE-LABEL: v16i8tov16i64_sign: 1421; PWR9LE: # %bb.0: # %entry 1422; PWR9LE-NEXT: addis r3, r2, .LCPI23_0@toc@ha 1423; PWR9LE-NEXT: addi r3, r3, .LCPI23_0@toc@l 1424; PWR9LE-NEXT: lxv v3, 0(r3) 1425; PWR9LE-NEXT: addis r3, r2, .LCPI23_1@toc@ha 1426; PWR9LE-NEXT: addi r3, r3, .LCPI23_1@toc@l 1427; PWR9LE-NEXT: lxv v4, 0(r3) 1428; PWR9LE-NEXT: addis r3, r2, .LCPI23_2@toc@ha 1429; PWR9LE-NEXT: vperm v3, v2, v2, v3 1430; PWR9LE-NEXT: addi r3, r3, .LCPI23_2@toc@l 1431; PWR9LE-NEXT: lxv v5, 0(r3) 1432; PWR9LE-NEXT: addis r3, r2, .LCPI23_3@toc@ha 1433; PWR9LE-NEXT: vextsb2d v3, v3 1434; PWR9LE-NEXT: vperm v4, v2, v2, v4 1435; PWR9LE-NEXT: addi r3, r3, .LCPI23_3@toc@l 1436; PWR9LE-NEXT: lxv v0, 0(r3) 1437; PWR9LE-NEXT: addis r3, r2, .LCPI23_4@toc@ha 1438; PWR9LE-NEXT: vextsb2d v4, v4 1439; PWR9LE-NEXT: vperm v5, v2, v2, v5 1440; PWR9LE-NEXT: addi r3, r3, .LCPI23_4@toc@l 1441; PWR9LE-NEXT: vaddudm v3, v4, v3 1442; PWR9LE-NEXT: lxv v1, 0(r3) 1443; PWR9LE-NEXT: addis r3, r2, .LCPI23_5@toc@ha 1444; PWR9LE-NEXT: vextsb2d v5, v5 1445; PWR9LE-NEXT: vperm v0, v2, v2, v0 1446; PWR9LE-NEXT: addi r3, r3, .LCPI23_5@toc@l 1447; PWR9LE-NEXT: lxv v6, 0(r3) 1448; PWR9LE-NEXT: addis r3, r2, .LCPI23_6@toc@ha 1449; PWR9LE-NEXT: vperm v1, v2, v2, v1 1450; PWR9LE-NEXT: vextsb2d v0, v0 1451; PWR9LE-NEXT: addi r3, r3, .LCPI23_6@toc@l 1452; PWR9LE-NEXT: vaddudm v5, v0, v5 1453; PWR9LE-NEXT: lxv v7, 0(r3) 1454; PWR9LE-NEXT: addis r3, r2, .LCPI23_7@toc@ha 1455; PWR9LE-NEXT: vperm v6, v2, v2, v6 1456; PWR9LE-NEXT: vextsb2d v1, v1 1457; PWR9LE-NEXT: vaddudm v3, v3, v5 1458; PWR9LE-NEXT: addi r3, r3, .LCPI23_7@toc@l 1459; PWR9LE-NEXT: lxv v8, 0(r3) 1460; PWR9LE-NEXT: vextsb2d v6, v6 1461; PWR9LE-NEXT: vperm v7, v2, v2, v7 1462; PWR9LE-NEXT: vaddudm v1, v6, v1 1463; PWR9LE-NEXT: vextsb2d v7, v7 1464; PWR9LE-NEXT: vperm v2, v2, v2, v8 1465; PWR9LE-NEXT: vextsb2d v2, v2 1466; PWR9LE-NEXT: vaddudm v2, v2, v7 1467; PWR9LE-NEXT: vaddudm v2, v1, v2 1468; PWR9LE-NEXT: vaddudm v2, v2, v3 1469; PWR9LE-NEXT: xxswapd v3, v2 1470; PWR9LE-NEXT: vaddudm v2, v2, v3 1471; PWR9LE-NEXT: mfvsrld r3, v2 1472; PWR9LE-NEXT: blr 1473; 1474; PWR9BE-LABEL: v16i8tov16i64_sign: 1475; PWR9BE: # %bb.0: # %entry 1476; PWR9BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha 1477; PWR9BE-NEXT: addi r3, r3, .LCPI23_0@toc@l 1478; PWR9BE-NEXT: lxv v3, 0(r3) 1479; PWR9BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha 1480; PWR9BE-NEXT: addi r3, r3, .LCPI23_1@toc@l 1481; PWR9BE-NEXT: lxv v4, 0(r3) 1482; PWR9BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha 1483; PWR9BE-NEXT: vperm v3, v2, v2, v3 1484; PWR9BE-NEXT: addi r3, r3, .LCPI23_2@toc@l 1485; PWR9BE-NEXT: lxv v5, 0(r3) 1486; PWR9BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha 1487; PWR9BE-NEXT: vextsb2d v3, v3 1488; PWR9BE-NEXT: vperm v4, v2, v2, v4 1489; PWR9BE-NEXT: addi r3, r3, .LCPI23_3@toc@l 1490; PWR9BE-NEXT: lxv v0, 0(r3) 1491; PWR9BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha 1492; PWR9BE-NEXT: vextsb2d v4, v4 1493; PWR9BE-NEXT: vperm v5, v2, v2, v5 1494; PWR9BE-NEXT: addi r3, r3, .LCPI23_4@toc@l 1495; PWR9BE-NEXT: vaddudm v3, v4, v3 1496; PWR9BE-NEXT: lxv v1, 0(r3) 1497; PWR9BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha 1498; PWR9BE-NEXT: vextsb2d v5, v5 1499; PWR9BE-NEXT: vperm v0, v2, v2, v0 1500; PWR9BE-NEXT: addi r3, r3, .LCPI23_5@toc@l 1501; PWR9BE-NEXT: lxv v6, 0(r3) 1502; PWR9BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha 1503; PWR9BE-NEXT: vperm v1, v2, v2, v1 1504; PWR9BE-NEXT: vextsb2d v0, v0 1505; PWR9BE-NEXT: addi r3, r3, .LCPI23_6@toc@l 1506; PWR9BE-NEXT: vaddudm v5, v0, v5 1507; PWR9BE-NEXT: lxv v7, 0(r3) 1508; PWR9BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha 1509; PWR9BE-NEXT: vperm v6, v2, v2, v6 1510; PWR9BE-NEXT: vextsb2d v1, v1 1511; PWR9BE-NEXT: vaddudm v3, v3, v5 1512; PWR9BE-NEXT: addi r3, r3, .LCPI23_7@toc@l 1513; PWR9BE-NEXT: lxv v8, 0(r3) 1514; PWR9BE-NEXT: vextsb2d v6, v6 1515; PWR9BE-NEXT: vperm v7, v2, v2, v7 1516; PWR9BE-NEXT: vaddudm v1, v6, v1 1517; PWR9BE-NEXT: vextsb2d v7, v7 1518; PWR9BE-NEXT: vperm v2, v2, v2, v8 1519; PWR9BE-NEXT: vextsb2d v2, v2 1520; PWR9BE-NEXT: vaddudm v2, v2, v7 1521; PWR9BE-NEXT: vaddudm v2, v1, v2 1522; PWR9BE-NEXT: vaddudm v2, v2, v3 1523; PWR9BE-NEXT: xxswapd v3, v2 1524; PWR9BE-NEXT: vaddudm v2, v2, v3 1525; PWR9BE-NEXT: mfvsrd r3, v2 1526; PWR9BE-NEXT: blr 1527; 1528; PWR10LE-LABEL: v16i8tov16i64_sign: 1529; PWR10LE: # %bb.0: # %entry 1530; PWR10LE-NEXT: plxv v3, .LCPI23_0@PCREL(0), 1 1531; PWR10LE-NEXT: plxv v4, .LCPI23_1@PCREL(0), 1 1532; PWR10LE-NEXT: vperm v3, v2, v2, v3 1533; PWR10LE-NEXT: plxv v5, .LCPI23_2@PCREL(0), 1 1534; PWR10LE-NEXT: plxv v0, .LCPI23_3@PCREL(0), 1 1535; PWR10LE-NEXT: plxv v1, .LCPI23_4@PCREL(0), 1 1536; PWR10LE-NEXT: plxv v6, .LCPI23_5@PCREL(0), 1 1537; PWR10LE-NEXT: plxv v7, .LCPI23_6@PCREL(0), 1 1538; PWR10LE-NEXT: plxv v8, .LCPI23_7@PCREL(0), 1 1539; PWR10LE-NEXT: vperm v4, v2, v2, v4 1540; PWR10LE-NEXT: vperm v5, v2, v2, v5 1541; PWR10LE-NEXT: vperm v0, v2, v2, v0 1542; PWR10LE-NEXT: vperm v1, v2, v2, v1 1543; PWR10LE-NEXT: vperm v6, v2, v2, v6 1544; PWR10LE-NEXT: vperm v7, v2, v2, v7 1545; PWR10LE-NEXT: vperm v2, v2, v2, v8 1546; PWR10LE-NEXT: vextsb2d v5, v5 1547; PWR10LE-NEXT: vextsb2d v0, v0 1548; PWR10LE-NEXT: vextsb2d v7, v7 1549; PWR10LE-NEXT: vextsb2d v2, v2 1550; PWR10LE-NEXT: vextsb2d v3, v3 1551; PWR10LE-NEXT: vextsb2d v4, v4 1552; PWR10LE-NEXT: vextsb2d v1, v1 1553; PWR10LE-NEXT: vextsb2d v6, v6 1554; PWR10LE-NEXT: vaddudm v2, v2, v7 1555; PWR10LE-NEXT: vaddudm v5, v0, v5 1556; PWR10LE-NEXT: vaddudm v3, v4, v3 1557; PWR10LE-NEXT: vaddudm v3, v3, v5 1558; PWR10LE-NEXT: vaddudm v4, v6, v1 1559; PWR10LE-NEXT: vaddudm v2, v4, v2 1560; PWR10LE-NEXT: vaddudm v2, v2, v3 1561; PWR10LE-NEXT: xxswapd v3, v2 1562; PWR10LE-NEXT: vaddudm v2, v2, v3 1563; PWR10LE-NEXT: mfvsrld r3, v2 1564; PWR10LE-NEXT: blr 1565; 1566; PWR10BE-LABEL: v16i8tov16i64_sign: 1567; PWR10BE: # %bb.0: # %entry 1568; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha 1569; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l 1570; PWR10BE-NEXT: lxv v3, 0(r3) 1571; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha 1572; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l 1573; PWR10BE-NEXT: lxv v4, 0(r3) 1574; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha 1575; PWR10BE-NEXT: vperm v3, v2, v2, v3 1576; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l 1577; PWR10BE-NEXT: vextsb2d v3, v3 1578; PWR10BE-NEXT: lxv v5, 0(r3) 1579; PWR10BE-NEXT: addis r3, r2, .LCPI23_3@toc@ha 1580; PWR10BE-NEXT: vperm v4, v2, v2, v4 1581; PWR10BE-NEXT: addi r3, r3, .LCPI23_3@toc@l 1582; PWR10BE-NEXT: vextsb2d v4, v4 1583; PWR10BE-NEXT: lxv v0, 0(r3) 1584; PWR10BE-NEXT: addis r3, r2, .LCPI23_4@toc@ha 1585; PWR10BE-NEXT: vperm v5, v2, v2, v5 1586; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l 1587; PWR10BE-NEXT: vextsb2d v5, v5 1588; PWR10BE-NEXT: lxv v1, 0(r3) 1589; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha 1590; PWR10BE-NEXT: vperm v0, v2, v2, v0 1591; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l 1592; PWR10BE-NEXT: vextsb2d v0, v0 1593; PWR10BE-NEXT: lxv v6, 0(r3) 1594; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha 1595; PWR10BE-NEXT: vperm v1, v2, v2, v1 1596; PWR10BE-NEXT: vaddudm v5, v0, v5 1597; PWR10BE-NEXT: vaddudm v3, v4, v3 1598; PWR10BE-NEXT: vaddudm v3, v3, v5 1599; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l 1600; PWR10BE-NEXT: vextsb2d v1, v1 1601; PWR10BE-NEXT: lxv v7, 0(r3) 1602; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha 1603; PWR10BE-NEXT: vperm v6, v2, v2, v6 1604; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l 1605; PWR10BE-NEXT: vextsb2d v6, v6 1606; PWR10BE-NEXT: lxv v8, 0(r3) 1607; PWR10BE-NEXT: vperm v7, v2, v2, v7 1608; PWR10BE-NEXT: vextsb2d v7, v7 1609; PWR10BE-NEXT: vperm v2, v2, v2, v8 1610; PWR10BE-NEXT: vextsb2d v2, v2 1611; PWR10BE-NEXT: vaddudm v2, v2, v7 1612; PWR10BE-NEXT: vaddudm v4, v6, v1 1613; PWR10BE-NEXT: vaddudm v2, v4, v2 1614; PWR10BE-NEXT: vaddudm v2, v2, v3 1615; PWR10BE-NEXT: xxswapd v3, v2 1616; PWR10BE-NEXT: vaddudm v2, v2, v3 1617; PWR10BE-NEXT: mfvsrd r3, v2 1618; PWR10BE-NEXT: blr 1619entry: 1620 %0 = sext <16 x i8> %a to <16 x i64> 1621 %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0) 1622 ret i64 %1 1623} 1624 1625define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 { 1626; PWR9LE-LABEL: v16i8tov16i64_zero: 1627; PWR9LE: # %bb.0: # %entry 1628; PWR9LE-NEXT: addis r3, r2, .LCPI24_0@toc@ha 1629; PWR9LE-NEXT: xxlxor v4, v4, v4 1630; PWR9LE-NEXT: addi r3, r3, .LCPI24_0@toc@l 1631; PWR9LE-NEXT: lxv v3, 0(r3) 1632; PWR9LE-NEXT: addis r3, r2, .LCPI24_1@toc@ha 1633; PWR9LE-NEXT: addi r3, r3, .LCPI24_1@toc@l 1634; PWR9LE-NEXT: lxv v5, 0(r3) 1635; PWR9LE-NEXT: addis r3, r2, .LCPI24_2@toc@ha 1636; PWR9LE-NEXT: vperm v3, v4, v2, v3 1637; PWR9LE-NEXT: addi r3, r3, .LCPI24_2@toc@l 1638; PWR9LE-NEXT: lxv v0, 0(r3) 1639; PWR9LE-NEXT: addis r3, r2, .LCPI24_3@toc@ha 1640; PWR9LE-NEXT: vperm v5, v4, v2, v5 1641; PWR9LE-NEXT: addi r3, r3, .LCPI24_3@toc@l 1642; PWR9LE-NEXT: lxv v1, 0(r3) 1643; PWR9LE-NEXT: addis r3, r2, .LCPI24_4@toc@ha 1644; PWR9LE-NEXT: vaddudm v3, v5, v3 1645; PWR9LE-NEXT: vperm v0, v4, v2, v0 1646; PWR9LE-NEXT: addi r3, r3, .LCPI24_4@toc@l 1647; PWR9LE-NEXT: lxv v6, 0(r3) 1648; PWR9LE-NEXT: addis r3, r2, .LCPI24_5@toc@ha 1649; PWR9LE-NEXT: vperm v1, v4, v2, v1 1650; PWR9LE-NEXT: addi r3, r3, .LCPI24_5@toc@l 1651; PWR9LE-NEXT: lxv v7, 0(r3) 1652; PWR9LE-NEXT: addis r3, r2, .LCPI24_6@toc@ha 1653; PWR9LE-NEXT: vaddudm v0, v1, v0 1654; PWR9LE-NEXT: vperm v6, v4, v2, v6 1655; PWR9LE-NEXT: addi r3, r3, .LCPI24_6@toc@l 1656; PWR9LE-NEXT: lxv v8, 0(r3) 1657; PWR9LE-NEXT: addis r3, r2, .LCPI24_7@toc@ha 1658; PWR9LE-NEXT: vaddudm v3, v3, v0 1659; PWR9LE-NEXT: vperm v7, v4, v2, v7 1660; PWR9LE-NEXT: addi r3, r3, .LCPI24_7@toc@l 1661; PWR9LE-NEXT: lxv v9, 0(r3) 1662; PWR9LE-NEXT: vperm v8, v4, v2, v8 1663; PWR9LE-NEXT: vperm v2, v4, v2, v9 1664; PWR9LE-NEXT: vaddudm v4, v7, v6 1665; PWR9LE-NEXT: vaddudm v2, v2, v8 1666; PWR9LE-NEXT: vaddudm v2, v4, v2 1667; PWR9LE-NEXT: vaddudm v2, v2, v3 1668; PWR9LE-NEXT: xxswapd v3, v2 1669; PWR9LE-NEXT: vaddudm v2, v2, v3 1670; PWR9LE-NEXT: mfvsrld r3, v2 1671; PWR9LE-NEXT: blr 1672; 1673; PWR9BE-LABEL: v16i8tov16i64_zero: 1674; PWR9BE: # %bb.0: # %entry 1675; PWR9BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha 1676; PWR9BE-NEXT: xxlxor v4, v4, v4 1677; PWR9BE-NEXT: addi r3, r3, .LCPI24_0@toc@l 1678; PWR9BE-NEXT: lxv v3, 0(r3) 1679; PWR9BE-NEXT: addis r3, r2, .LCPI24_1@toc@ha 1680; PWR9BE-NEXT: addi r3, r3, .LCPI24_1@toc@l 1681; PWR9BE-NEXT: lxv v5, 0(r3) 1682; PWR9BE-NEXT: addis r3, r2, .LCPI24_2@toc@ha 1683; PWR9BE-NEXT: vperm v3, v4, v2, v3 1684; PWR9BE-NEXT: addi r3, r3, .LCPI24_2@toc@l 1685; PWR9BE-NEXT: lxv v0, 0(r3) 1686; PWR9BE-NEXT: addis r3, r2, .LCPI24_3@toc@ha 1687; PWR9BE-NEXT: vperm v5, v4, v2, v5 1688; PWR9BE-NEXT: addi r3, r3, .LCPI24_3@toc@l 1689; PWR9BE-NEXT: lxv v1, 0(r3) 1690; PWR9BE-NEXT: addis r3, r2, .LCPI24_4@toc@ha 1691; PWR9BE-NEXT: vaddudm v3, v5, v3 1692; PWR9BE-NEXT: vperm v0, v4, v2, v0 1693; PWR9BE-NEXT: addi r3, r3, .LCPI24_4@toc@l 1694; PWR9BE-NEXT: lxv v6, 0(r3) 1695; PWR9BE-NEXT: addis r3, r2, .LCPI24_5@toc@ha 1696; PWR9BE-NEXT: vperm v1, v4, v2, v1 1697; PWR9BE-NEXT: addi r3, r3, .LCPI24_5@toc@l 1698; PWR9BE-NEXT: lxv v7, 0(r3) 1699; PWR9BE-NEXT: addis r3, r2, .LCPI24_6@toc@ha 1700; PWR9BE-NEXT: vaddudm v0, v1, v0 1701; PWR9BE-NEXT: vperm v6, v4, v2, v6 1702; PWR9BE-NEXT: addi r3, r3, .LCPI24_6@toc@l 1703; PWR9BE-NEXT: lxv v8, 0(r3) 1704; PWR9BE-NEXT: addis r3, r2, .LCPI24_7@toc@ha 1705; PWR9BE-NEXT: vaddudm v3, v3, v0 1706; PWR9BE-NEXT: vperm v7, v4, v2, v7 1707; PWR9BE-NEXT: addi r3, r3, .LCPI24_7@toc@l 1708; PWR9BE-NEXT: lxv v9, 0(r3) 1709; PWR9BE-NEXT: vperm v8, v4, v2, v8 1710; PWR9BE-NEXT: vperm v2, v4, v2, v9 1711; PWR9BE-NEXT: vaddudm v4, v7, v6 1712; PWR9BE-NEXT: vaddudm v2, v2, v8 1713; PWR9BE-NEXT: vaddudm v2, v4, v2 1714; PWR9BE-NEXT: vaddudm v2, v2, v3 1715; PWR9BE-NEXT: xxswapd v3, v2 1716; PWR9BE-NEXT: vaddudm v2, v2, v3 1717; PWR9BE-NEXT: mfvsrd r3, v2 1718; PWR9BE-NEXT: blr 1719; 1720; PWR10LE-LABEL: v16i8tov16i64_zero: 1721; PWR10LE: # %bb.0: # %entry 1722; PWR10LE-NEXT: plxv v3, .LCPI24_0@PCREL(0), 1 1723; PWR10LE-NEXT: plxv v5, .LCPI24_1@PCREL(0), 1 1724; PWR10LE-NEXT: xxlxor v4, v4, v4 1725; PWR10LE-NEXT: vperm v3, v4, v2, v3 1726; PWR10LE-NEXT: plxv v0, .LCPI24_2@PCREL(0), 1 1727; PWR10LE-NEXT: plxv v1, .LCPI24_3@PCREL(0), 1 1728; PWR10LE-NEXT: plxv v6, .LCPI24_4@PCREL(0), 1 1729; PWR10LE-NEXT: plxv v7, .LCPI24_5@PCREL(0), 1 1730; PWR10LE-NEXT: plxv v8, .LCPI24_6@PCREL(0), 1 1731; PWR10LE-NEXT: plxv v9, .LCPI24_7@PCREL(0), 1 1732; PWR10LE-NEXT: vperm v5, v4, v2, v5 1733; PWR10LE-NEXT: vperm v0, v4, v2, v0 1734; PWR10LE-NEXT: vperm v1, v4, v2, v1 1735; PWR10LE-NEXT: vperm v6, v4, v2, v6 1736; PWR10LE-NEXT: vperm v7, v4, v2, v7 1737; PWR10LE-NEXT: vperm v8, v4, v2, v8 1738; PWR10LE-NEXT: vperm v2, v4, v2, v9 1739; PWR10LE-NEXT: vaddudm v2, v2, v8 1740; PWR10LE-NEXT: vaddudm v4, v1, v0 1741; PWR10LE-NEXT: vaddudm v3, v5, v3 1742; PWR10LE-NEXT: vaddudm v3, v3, v4 1743; PWR10LE-NEXT: vaddudm v4, v7, v6 1744; PWR10LE-NEXT: vaddudm v2, v4, v2 1745; PWR10LE-NEXT: vaddudm v2, v2, v3 1746; PWR10LE-NEXT: xxswapd v3, v2 1747; PWR10LE-NEXT: vaddudm v2, v2, v3 1748; PWR10LE-NEXT: mfvsrld r3, v2 1749; PWR10LE-NEXT: blr 1750; 1751; PWR10BE-LABEL: v16i8tov16i64_zero: 1752; PWR10BE: # %bb.0: # %entry 1753; PWR10BE-NEXT: addis r3, r2, .LCPI24_0@toc@ha 1754; PWR10BE-NEXT: xxlxor v4, v4, v4 1755; PWR10BE-NEXT: addi r3, r3, .LCPI24_0@toc@l 1756; PWR10BE-NEXT: lxv v3, 0(r3) 1757; PWR10BE-NEXT: addis r3, r2, .LCPI24_1@toc@ha 1758; PWR10BE-NEXT: addi r3, r3, .LCPI24_1@toc@l 1759; PWR10BE-NEXT: lxv v5, 0(r3) 1760; PWR10BE-NEXT: addis r3, r2, .LCPI24_2@toc@ha 1761; PWR10BE-NEXT: vperm v3, v4, v2, v3 1762; PWR10BE-NEXT: addi r3, r3, .LCPI24_2@toc@l 1763; PWR10BE-NEXT: lxv v0, 0(r3) 1764; PWR10BE-NEXT: addis r3, r2, .LCPI24_3@toc@ha 1765; PWR10BE-NEXT: vperm v5, v4, v2, v5 1766; PWR10BE-NEXT: addi r3, r3, .LCPI24_3@toc@l 1767; PWR10BE-NEXT: lxv v1, 0(r3) 1768; PWR10BE-NEXT: addis r3, r2, .LCPI24_4@toc@ha 1769; PWR10BE-NEXT: vperm v0, v4, v2, v0 1770; PWR10BE-NEXT: addi r3, r3, .LCPI24_4@toc@l 1771; PWR10BE-NEXT: lxv v6, 0(r3) 1772; PWR10BE-NEXT: addis r3, r2, .LCPI24_5@toc@ha 1773; PWR10BE-NEXT: vperm v1, v4, v2, v1 1774; PWR10BE-NEXT: addi r3, r3, .LCPI24_5@toc@l 1775; PWR10BE-NEXT: lxv v7, 0(r3) 1776; PWR10BE-NEXT: addis r3, r2, .LCPI24_6@toc@ha 1777; PWR10BE-NEXT: vperm v6, v4, v2, v6 1778; PWR10BE-NEXT: addi r3, r3, .LCPI24_6@toc@l 1779; PWR10BE-NEXT: lxv v8, 0(r3) 1780; PWR10BE-NEXT: addis r3, r2, .LCPI24_7@toc@ha 1781; PWR10BE-NEXT: vperm v7, v4, v2, v7 1782; PWR10BE-NEXT: addi r3, r3, .LCPI24_7@toc@l 1783; PWR10BE-NEXT: lxv v9, 0(r3) 1784; PWR10BE-NEXT: vperm v8, v4, v2, v8 1785; PWR10BE-NEXT: vperm v2, v4, v2, v9 1786; PWR10BE-NEXT: vaddudm v4, v1, v0 1787; PWR10BE-NEXT: vaddudm v3, v5, v3 1788; PWR10BE-NEXT: vaddudm v3, v3, v4 1789; PWR10BE-NEXT: vaddudm v2, v2, v8 1790; PWR10BE-NEXT: vaddudm v4, v7, v6 1791; PWR10BE-NEXT: vaddudm v2, v4, v2 1792; PWR10BE-NEXT: vaddudm v2, v2, v3 1793; PWR10BE-NEXT: xxswapd v3, v2 1794; PWR10BE-NEXT: vaddudm v2, v2, v3 1795; PWR10BE-NEXT: mfvsrd r3, v2 1796; PWR10BE-NEXT: blr 1797entry: 1798 %0 = zext <16 x i8> %a to <16 x i64> 1799 %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0) 1800 ret i64 %1 1801} 1802 1803declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #0 1804declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #0 1805declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #0 1806declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) #0 1807 1808attributes #0 = { nounwind } 1809