1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 4; RUN: FileCheck %s --check-prefix=CHECK-LE 5; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 7; RUN: FileCheck %s --check-prefix=CHECK-BE 8; RUN: opt --passes='sroa,loop-vectorize,loop-unroll,instcombine' -S \ 9; RUN: -vectorizer-maximize-bandwidth --mtriple=powerpc64le-- -mcpu=pwr10 < %s | \ 10; RUN: FileCheck %s --check-prefix=CHECK-OPT 11 12target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" 13 14define dso_local signext i32 @test_32byte_vector() nounwind { 15; CHECK-LE-LABEL: test_32byte_vector: 16; CHECK-LE: # %bb.0: # %entry 17; CHECK-LE-NEXT: mflr r0 18; CHECK-LE-NEXT: std r30, -16(r1) 19; CHECK-LE-NEXT: mr r30, r1 20; CHECK-LE-NEXT: std r0, 16(r1) 21; CHECK-LE-NEXT: clrldi r0, r1, 59 22; CHECK-LE-NEXT: subfic r0, r0, -96 23; CHECK-LE-NEXT: stdux r1, r1, r0 24; CHECK-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 25; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l 26; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 27; CHECK-LE-NEXT: addi r3, r1, 48 28; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 29; CHECK-LE-NEXT: addis r3, r2, .LCPI0_1@toc@ha 30; CHECK-LE-NEXT: addi r3, r3, .LCPI0_1@toc@l 31; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 32; CHECK-LE-NEXT: addi r3, r1, 32 33; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 34; CHECK-LE-NEXT: bl test 35; CHECK-LE-NEXT: nop 36; CHECK-LE-NEXT: lwa r3, 32(r1) 37; CHECK-LE-NEXT: mr r1, r30 38; CHECK-LE-NEXT: ld r0, 16(r1) 39; CHECK-LE-NEXT: ld r30, -16(r1) 40; CHECK-LE-NEXT: mtlr r0 41; CHECK-LE-NEXT: blr 42; 43; CHECK-BE-LABEL: test_32byte_vector: 44; CHECK-BE: # %bb.0: # %entry 45; CHECK-BE-NEXT: mflr r0 46; CHECK-BE-NEXT: std r30, -16(r1) 47; CHECK-BE-NEXT: std r0, 16(r1) 48; CHECK-BE-NEXT: clrldi r0, r1, 59 49; CHECK-BE-NEXT: mr r30, r1 50; CHECK-BE-NEXT: subfic r0, r0, -192 51; CHECK-BE-NEXT: stdux r1, r1, r0 52; CHECK-BE-NEXT: lis r3, -8192 53; CHECK-BE-NEXT: li r4, 5 54; CHECK-BE-NEXT: lis r5, -16384 55; CHECK-BE-NEXT: lis r6, -32768 56; CHECK-BE-NEXT: ori r3, r3, 1 57; CHECK-BE-NEXT: rldic r4, r4, 32, 29 58; CHECK-BE-NEXT: ori r5, r5, 1 59; CHECK-BE-NEXT: ori r6, r6, 1 60; CHECK-BE-NEXT: rldic r3, r3, 3, 29 61; CHECK-BE-NEXT: ori r4, r4, 6 62; CHECK-BE-NEXT: rldic r5, r5, 2, 30 63; CHECK-BE-NEXT: rldic r6, r6, 1, 31 64; CHECK-BE-NEXT: std r3, 152(r1) 65; CHECK-BE-NEXT: addi r3, r1, 128 66; CHECK-BE-NEXT: std r4, 144(r1) 67; CHECK-BE-NEXT: std r5, 136(r1) 68; CHECK-BE-NEXT: std r6, 128(r1) 69; CHECK-BE-NEXT: bl test 70; CHECK-BE-NEXT: nop 71; CHECK-BE-NEXT: lwa r3, 128(r1) 72; CHECK-BE-NEXT: mr r1, r30 73; CHECK-BE-NEXT: ld r0, 16(r1) 74; CHECK-BE-NEXT: ld r30, -16(r1) 75; CHECK-BE-NEXT: mtlr r0 76; CHECK-BE-NEXT: blr 77entry: 78 %a = alloca <8 x i32>, align 32 79 call void @llvm.lifetime.start.p0(i64 32, ptr %a) 80 store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, ptr %a, align 32 81 call void @test(ptr %a) 82 %0 = load <8 x i32>, ptr %a, align 32 83 %vecext = extractelement <8 x i32> %0, i32 0 84 call void @llvm.lifetime.end.p0(i64 32, ptr %a) 85 ret i32 %vecext 86} 87 88define dso_local signext i32 @test_32byte_aligned_vector() nounwind { 89; CHECK-LE-LABEL: test_32byte_aligned_vector: 90; CHECK-LE: # %bb.0: # %entry 91; CHECK-LE-NEXT: mflr r0 92; CHECK-LE-NEXT: std r30, -16(r1) 93; CHECK-LE-NEXT: mr r30, r1 94; CHECK-LE-NEXT: std r0, 16(r1) 95; CHECK-LE-NEXT: clrldi r0, r1, 59 96; CHECK-LE-NEXT: subfic r0, r0, -64 97; CHECK-LE-NEXT: stdux r1, r1, r0 98; CHECK-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 99; CHECK-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l 100; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 101; CHECK-LE-NEXT: addi r3, r1, 32 102; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 103; CHECK-LE-NEXT: bl test1 104; CHECK-LE-NEXT: nop 105; CHECK-LE-NEXT: lwa r3, 32(r1) 106; CHECK-LE-NEXT: mr r1, r30 107; CHECK-LE-NEXT: ld r0, 16(r1) 108; CHECK-LE-NEXT: ld r30, -16(r1) 109; CHECK-LE-NEXT: mtlr r0 110; CHECK-LE-NEXT: blr 111; 112; CHECK-BE-LABEL: test_32byte_aligned_vector: 113; CHECK-BE: # %bb.0: # %entry 114; CHECK-BE-NEXT: mflr r0 115; CHECK-BE-NEXT: std r30, -16(r1) 116; CHECK-BE-NEXT: std r0, 16(r1) 117; CHECK-BE-NEXT: clrldi r0, r1, 59 118; CHECK-BE-NEXT: mr r30, r1 119; CHECK-BE-NEXT: subfic r0, r0, -160 120; CHECK-BE-NEXT: stdux r1, r1, r0 121; CHECK-BE-NEXT: lis r3, -16384 122; CHECK-BE-NEXT: lis r4, -32768 123; CHECK-BE-NEXT: ori r3, r3, 1 124; CHECK-BE-NEXT: ori r4, r4, 1 125; CHECK-BE-NEXT: rldic r3, r3, 2, 30 126; CHECK-BE-NEXT: rldic r4, r4, 1, 31 127; CHECK-BE-NEXT: std r3, 136(r1) 128; CHECK-BE-NEXT: addi r3, r1, 128 129; CHECK-BE-NEXT: std r4, 128(r1) 130; CHECK-BE-NEXT: bl test1 131; CHECK-BE-NEXT: nop 132; CHECK-BE-NEXT: lwa r3, 128(r1) 133; CHECK-BE-NEXT: mr r1, r30 134; CHECK-BE-NEXT: ld r0, 16(r1) 135; CHECK-BE-NEXT: ld r30, -16(r1) 136; CHECK-BE-NEXT: mtlr r0 137; CHECK-BE-NEXT: blr 138entry: 139 %a = alloca <4 x i32>, align 32 140 call void @llvm.lifetime.start.p0(i64 16, ptr %a) 141 store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %a, align 32 142 call void @test1(ptr %a) 143 %0 = load <4 x i32>, ptr %a, align 32 144 %vecext = extractelement <4 x i32> %0, i32 0 145 call void @llvm.lifetime.end.p0(i64 16, ptr %a) 146 ret i32 %vecext 147} 148 149 150@Arr1 = dso_local global [64 x i8] zeroinitializer, align 1 151 152define dso_local void @test_Array() nounwind { 153; CHECK-OPT-LABEL: @test_Array( 154; CHECK-OPT-NEXT: entry: 155; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2 156; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], ptr [[TMP0:%.*]], align 2 157; CHECK-LE-LABEL: test_Array: 158; CHECK-LE: # %bb.0: # %entry 159; CHECK-LE-NEXT: mflr r0 160; CHECK-LE-NEXT: stdu r1, -176(r1) 161; CHECK-LE-NEXT: li r3, 0 162; CHECK-LE-NEXT: addis r4, r2, Arr1@toc@ha 163; CHECK-LE-NEXT: li r6, 65 164; CHECK-LE-NEXT: std r0, 192(r1) 165; CHECK-LE-NEXT: addi r5, r1, 46 166; CHECK-LE-NEXT: stw r3, 44(r1) 167; CHECK-LE-NEXT: addi r4, r4, Arr1@toc@l 168; CHECK-LE-NEXT: mtctr r6 169; CHECK-LE-NEXT: addi r4, r4, -1 170; CHECK-LE-NEXT: bdz .LBB2_2 171; CHECK-LE-NEXT: .p2align 5 172; CHECK-LE-NEXT: .LBB2_1: # %for.body 173; CHECK-LE-NEXT: # 174; CHECK-LE-NEXT: lbz r6, 1(r4) 175; CHECK-LE-NEXT: addi r4, r4, 1 176; CHECK-LE-NEXT: addi r3, r3, 1 177; CHECK-LE-NEXT: sth r6, 2(r5) 178; CHECK-LE-NEXT: addi r5, r5, 2 179; CHECK-LE-NEXT: bdnz .LBB2_1 180; CHECK-LE-NEXT: .LBB2_2: # %for.cond.cleanup 181; CHECK-LE-NEXT: addi r3, r1, 48 182; CHECK-LE-NEXT: bl test_arr 183; CHECK-LE-NEXT: nop 184; CHECK-LE-NEXT: addi r1, r1, 176 185; CHECK-LE-NEXT: ld r0, 16(r1) 186; CHECK-LE-NEXT: mtlr r0 187; CHECK-LE-NEXT: blr 188; 189; CHECK-BE-LABEL: test_Array: 190; CHECK-BE: # %bb.0: # %entry 191; CHECK-BE-NEXT: mflr r0 192; CHECK-BE-NEXT: stdu r1, -256(r1) 193; CHECK-BE-NEXT: addis r5, r2, Arr1@toc@ha 194; CHECK-BE-NEXT: li r3, 0 195; CHECK-BE-NEXT: addi r5, r5, Arr1@toc@l 196; CHECK-BE-NEXT: std r0, 272(r1) 197; CHECK-BE-NEXT: addi r4, r1, 126 198; CHECK-BE-NEXT: li r6, 65 199; CHECK-BE-NEXT: stw r3, 124(r1) 200; CHECK-BE-NEXT: addi r5, r5, -1 201; CHECK-BE-NEXT: mtctr r6 202; CHECK-BE-NEXT: bdz .LBB2_2 203; CHECK-BE-NEXT: .LBB2_1: # %for.body 204; CHECK-BE-NEXT: # 205; CHECK-BE-NEXT: lbz r6, 1(r5) 206; CHECK-BE-NEXT: addi r5, r5, 1 207; CHECK-BE-NEXT: addi r3, r3, 1 208; CHECK-BE-NEXT: sth r6, 2(r4) 209; CHECK-BE-NEXT: addi r4, r4, 2 210; CHECK-BE-NEXT: bdnz .LBB2_1 211; CHECK-BE-NEXT: .LBB2_2: # %for.cond.cleanup 212; CHECK-BE-NEXT: addi r3, r1, 128 213; CHECK-BE-NEXT: bl test_arr 214; CHECK-BE-NEXT: nop 215; CHECK-BE-NEXT: addi r1, r1, 256 216; CHECK-BE-NEXT: ld r0, 16(r1) 217; CHECK-BE-NEXT: mtlr r0 218; CHECK-BE-NEXT: blr 219entry: 220 %Arr2 = alloca [64 x i16], align 2 221 %i = alloca i32, align 4 222 call void @llvm.lifetime.start.p0(i64 128, ptr %Arr2) 223 call void @llvm.lifetime.start.p0(i64 4, ptr %i) 224 store i32 0, ptr %i, align 4 225 br label %for.cond 226 227for.cond: ; preds = %for.inc, %entry 228 %0 = load i32, ptr %i, align 4 229 %cmp = icmp slt i32 %0, 64 230 br i1 %cmp, label %for.body, label %for.cond.cleanup 231 232for.cond.cleanup: ; preds = %for.cond 233 call void @llvm.lifetime.end.p0(i64 4, ptr %i) 234 br label %for.end 235 236for.body: ; preds = %for.cond 237 %1 = load i32, ptr %i, align 4 238 %idxprom = sext i32 %1 to i64 239 %arrayidx = getelementptr inbounds [64 x i8], ptr @Arr1, i64 0, i64 %idxprom 240 %2 = load i8, ptr %arrayidx, align 1 241 %conv = zext i8 %2 to i16 242 %3 = load i32, ptr %i, align 4 243 %idxprom1 = sext i32 %3 to i64 244 %arrayidx2 = getelementptr inbounds [64 x i16], ptr %Arr2, i64 0, i64 %idxprom1 245 store i16 %conv, ptr %arrayidx2, align 2 246 br label %for.inc 247 248for.inc: ; preds = %for.body 249 %4 = load i32, ptr %i, align 4 250 %inc = add nsw i32 %4, 1 251 store i32 %inc, ptr %i, align 4 252 br label %for.cond 253 254for.end: ; preds = %for.cond.cleanup 255 call void @test_arr(ptr %Arr2) 256 call void @llvm.lifetime.end.p0(i64 128, ptr %Arr2) 257 ret void 258} 259 260declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) nounwind 261 262declare void @test(ptr) nounwind 263declare void @test1(ptr) nounwind 264declare void @test_arr(ptr) 265 266declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) nounwind 267