1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE 8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 9; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE 10 11define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 { 12; PWR9LE-LABEL: v2i32: 13; PWR9LE: # %bb.0: # %entry 14; PWR9LE-NEXT: xxspltw v3, v2, 2 15; PWR9LE-NEXT: li r3, 0 16; PWR9LE-NEXT: vmuluwm v2, v2, v3 17; PWR9LE-NEXT: vextuwrx r3, r3, v2 18; PWR9LE-NEXT: blr 19; 20; PWR9BE-LABEL: v2i32: 21; PWR9BE: # %bb.0: # %entry 22; PWR9BE-NEXT: xxspltw v3, v2, 1 23; PWR9BE-NEXT: li r3, 0 24; PWR9BE-NEXT: vmuluwm v2, v2, v3 25; PWR9BE-NEXT: vextuwlx r3, r3, v2 26; PWR9BE-NEXT: blr 27; 28; PWR10LE-LABEL: v2i32: 29; PWR10LE: # %bb.0: # %entry 30; PWR10LE-NEXT: xxspltw v3, v2, 2 31; PWR10LE-NEXT: li r3, 0 32; PWR10LE-NEXT: vmuluwm v2, v2, v3 33; PWR10LE-NEXT: vextuwrx r3, r3, v2 34; PWR10LE-NEXT: blr 35; 36; PWR10BE-LABEL: v2i32: 37; PWR10BE: # %bb.0: # %entry 38; PWR10BE-NEXT: xxspltw v3, v2, 1 39; PWR10BE-NEXT: li r3, 0 40; PWR10BE-NEXT: vmuluwm v2, v2, v3 41; PWR10BE-NEXT: vextuwlx r3, r3, v2 42; PWR10BE-NEXT: blr 43entry: 44 %0 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a) 45 ret i32 %0 46} 47 48define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 { 49; PWR9LE-LABEL: v4i32: 50; PWR9LE: # %bb.0: # %entry 51; PWR9LE-NEXT: xxswapd v3, v2 52; PWR9LE-NEXT: li r3, 0 53; PWR9LE-NEXT: vmuluwm v2, v2, v3 54; PWR9LE-NEXT: xxspltw v3, v2, 2 55; PWR9LE-NEXT: vmuluwm v2, v2, v3 56; PWR9LE-NEXT: vextuwrx r3, r3, v2 57; PWR9LE-NEXT: blr 58; 59; PWR9BE-LABEL: v4i32: 60; PWR9BE: # %bb.0: # %entry 61; PWR9BE-NEXT: xxswapd v3, v2 62; PWR9BE-NEXT: li r3, 0 63; PWR9BE-NEXT: vmuluwm v2, v2, v3 64; PWR9BE-NEXT: xxspltw v3, v2, 1 65; PWR9BE-NEXT: vmuluwm v2, v2, v3 66; PWR9BE-NEXT: vextuwlx r3, r3, v2 67; PWR9BE-NEXT: blr 68; 69; PWR10LE-LABEL: v4i32: 70; PWR10LE: # %bb.0: # %entry 71; PWR10LE-NEXT: xxswapd v3, v2 72; PWR10LE-NEXT: li r3, 0 73; PWR10LE-NEXT: vmuluwm v2, v2, v3 74; PWR10LE-NEXT: xxspltw v3, v2, 2 75; PWR10LE-NEXT: vmuluwm v2, v2, v3 76; PWR10LE-NEXT: vextuwrx r3, r3, v2 77; PWR10LE-NEXT: blr 78; 79; PWR10BE-LABEL: v4i32: 80; PWR10BE: # %bb.0: # %entry 81; PWR10BE-NEXT: xxswapd v3, v2 82; PWR10BE-NEXT: li r3, 0 83; PWR10BE-NEXT: vmuluwm v2, v2, v3 84; PWR10BE-NEXT: xxspltw v3, v2, 1 85; PWR10BE-NEXT: vmuluwm v2, v2, v3 86; PWR10BE-NEXT: vextuwlx r3, r3, v2 87; PWR10BE-NEXT: blr 88entry: 89 %0 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a) 90 ret i32 %0 91} 92 93define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 { 94; PWR9LE-LABEL: v8i32: 95; PWR9LE: # %bb.0: # %entry 96; PWR9LE-NEXT: vmuluwm v2, v2, v3 97; PWR9LE-NEXT: li r3, 0 98; PWR9LE-NEXT: xxswapd v3, v2 99; PWR9LE-NEXT: vmuluwm v2, v2, v3 100; PWR9LE-NEXT: xxspltw v3, v2, 2 101; PWR9LE-NEXT: vmuluwm v2, v2, v3 102; PWR9LE-NEXT: vextuwrx r3, r3, v2 103; PWR9LE-NEXT: blr 104; 105; PWR9BE-LABEL: v8i32: 106; PWR9BE: # %bb.0: # %entry 107; PWR9BE-NEXT: vmuluwm v2, v2, v3 108; PWR9BE-NEXT: li r3, 0 109; PWR9BE-NEXT: xxswapd v3, v2 110; PWR9BE-NEXT: vmuluwm v2, v2, v3 111; PWR9BE-NEXT: xxspltw v3, v2, 1 112; PWR9BE-NEXT: vmuluwm v2, v2, v3 113; PWR9BE-NEXT: vextuwlx r3, r3, v2 114; PWR9BE-NEXT: blr 115; 116; PWR10LE-LABEL: v8i32: 117; PWR10LE: # %bb.0: # %entry 118; PWR10LE-NEXT: vmuluwm v2, v2, v3 119; PWR10LE-NEXT: li r3, 0 120; PWR10LE-NEXT: xxswapd v3, v2 121; PWR10LE-NEXT: vmuluwm v2, v2, v3 122; PWR10LE-NEXT: xxspltw v3, v2, 2 123; PWR10LE-NEXT: vmuluwm v2, v2, v3 124; PWR10LE-NEXT: vextuwrx r3, r3, v2 125; PWR10LE-NEXT: blr 126; 127; PWR10BE-LABEL: v8i32: 128; PWR10BE: # %bb.0: # %entry 129; PWR10BE-NEXT: vmuluwm v2, v2, v3 130; PWR10BE-NEXT: li r3, 0 131; PWR10BE-NEXT: xxswapd v3, v2 132; PWR10BE-NEXT: vmuluwm v2, v2, v3 133; PWR10BE-NEXT: xxspltw v3, v2, 1 134; PWR10BE-NEXT: vmuluwm v2, v2, v3 135; PWR10BE-NEXT: vextuwlx r3, r3, v2 136; PWR10BE-NEXT: blr 137entry: 138 %0 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a) 139 ret i32 %0 140} 141 142define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 { 143; PWR9LE-LABEL: v16i32: 144; PWR9LE: # %bb.0: # %entry 145; PWR9LE-NEXT: vmuluwm v3, v3, v5 146; PWR9LE-NEXT: vmuluwm v2, v2, v4 147; PWR9LE-NEXT: li r3, 0 148; PWR9LE-NEXT: vmuluwm v2, v2, v3 149; PWR9LE-NEXT: xxswapd v3, v2 150; PWR9LE-NEXT: vmuluwm v2, v2, v3 151; PWR9LE-NEXT: xxspltw v3, v2, 2 152; PWR9LE-NEXT: vmuluwm v2, v2, v3 153; PWR9LE-NEXT: vextuwrx r3, r3, v2 154; PWR9LE-NEXT: blr 155; 156; PWR9BE-LABEL: v16i32: 157; PWR9BE: # %bb.0: # %entry 158; PWR9BE-NEXT: vmuluwm v3, v3, v5 159; PWR9BE-NEXT: vmuluwm v2, v2, v4 160; PWR9BE-NEXT: li r3, 0 161; PWR9BE-NEXT: vmuluwm v2, v2, v3 162; PWR9BE-NEXT: xxswapd v3, v2 163; PWR9BE-NEXT: vmuluwm v2, v2, v3 164; PWR9BE-NEXT: xxspltw v3, v2, 1 165; PWR9BE-NEXT: vmuluwm v2, v2, v3 166; PWR9BE-NEXT: vextuwlx r3, r3, v2 167; PWR9BE-NEXT: blr 168; 169; PWR10LE-LABEL: v16i32: 170; PWR10LE: # %bb.0: # %entry 171; PWR10LE-NEXT: vmuluwm v3, v3, v5 172; PWR10LE-NEXT: vmuluwm v2, v2, v4 173; PWR10LE-NEXT: li r3, 0 174; PWR10LE-NEXT: vmuluwm v2, v2, v3 175; PWR10LE-NEXT: xxswapd v3, v2 176; PWR10LE-NEXT: vmuluwm v2, v2, v3 177; PWR10LE-NEXT: xxspltw v3, v2, 2 178; PWR10LE-NEXT: vmuluwm v2, v2, v3 179; PWR10LE-NEXT: vextuwrx r3, r3, v2 180; PWR10LE-NEXT: blr 181; 182; PWR10BE-LABEL: v16i32: 183; PWR10BE: # %bb.0: # %entry 184; PWR10BE-NEXT: vmuluwm v3, v3, v5 185; PWR10BE-NEXT: vmuluwm v2, v2, v4 186; PWR10BE-NEXT: li r3, 0 187; PWR10BE-NEXT: vmuluwm v2, v2, v3 188; PWR10BE-NEXT: xxswapd v3, v2 189; PWR10BE-NEXT: vmuluwm v2, v2, v3 190; PWR10BE-NEXT: xxspltw v3, v2, 1 191; PWR10BE-NEXT: vmuluwm v2, v2, v3 192; PWR10BE-NEXT: vextuwlx r3, r3, v2 193; PWR10BE-NEXT: blr 194entry: 195 %0 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a) 196 ret i32 %0 197} 198 199declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) #0 200declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) #0 201declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) #0 202declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) #0 203 204attributes #0 = { nounwind } 205