1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s 3; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-postra-bias-addi=false < %s |\ 4; RUN: FileCheck -check-prefix=CHECK-P9-NO-HEURISTIC %s 5 6%_type_of_scalars = type <{ [16 x i8], double, [152 x i8] }> 7%_elem_type_of_x = type <{ double }> 8%_elem_type_of_a = type <{ double }> 9 10@scalars = common dso_local local_unnamed_addr global %_type_of_scalars zeroinitializer, align 16 11 12define dso_local void @test(ptr noalias %.x, ptr %.a, ptr noalias %.n) { 13; CHECK-P9-LABEL: test: 14; CHECK-P9: # %bb.0: # %entry 15; CHECK-P9-NEXT: ld 5, 0(5) 16; CHECK-P9-NEXT: addis 6, 2, scalars@toc@ha 17; CHECK-P9-NEXT: addi 6, 6, scalars@toc@l 18; CHECK-P9-NEXT: rldicr 5, 5, 0, 58 19; CHECK-P9-NEXT: addi 6, 6, 16 20; CHECK-P9-NEXT: addi 5, 5, -32 21; CHECK-P9-NEXT: lxvdsx 0, 0, 6 22; CHECK-P9-NEXT: rldicl 5, 5, 59, 5 23; CHECK-P9-NEXT: addi 5, 5, 1 24; CHECK-P9-NEXT: mtctr 5 25; CHECK-P9-NEXT: .p2align 4 26; CHECK-P9-NEXT: .LBB0_1: # %vector.body 27; CHECK-P9-NEXT: # 28; CHECK-P9-NEXT: lxv 1, 16(4) 29; CHECK-P9-NEXT: lxv 2, 0(4) 30; CHECK-P9-NEXT: lxv 3, 48(4) 31; CHECK-P9-NEXT: lxv 4, 32(4) 32; CHECK-P9-NEXT: xvmuldp 2, 2, 0 33; CHECK-P9-NEXT: lxv 5, 240(4) 34; CHECK-P9-NEXT: lxv 6, 224(4) 35; CHECK-P9-NEXT: xvmuldp 1, 1, 0 36; CHECK-P9-NEXT: xvmuldp 4, 4, 0 37; CHECK-P9-NEXT: xvmuldp 3, 3, 0 38; CHECK-P9-NEXT: xvmuldp 6, 6, 0 39; CHECK-P9-NEXT: xvmuldp 5, 5, 0 40; CHECK-P9-NEXT: addi 4, 4, 256 41; CHECK-P9-NEXT: stxv 1, 16(3) 42; CHECK-P9-NEXT: stxv 2, 0(3) 43; CHECK-P9-NEXT: stxv 3, 48(3) 44; CHECK-P9-NEXT: stxv 4, 32(3) 45; CHECK-P9-NEXT: stxv 5, 240(3) 46; CHECK-P9-NEXT: stxv 6, 224(3) 47; CHECK-P9-NEXT: addi 3, 3, 256 48; CHECK-P9-NEXT: bdnz .LBB0_1 49; CHECK-P9-NEXT: # %bb.2: # %return.block 50; CHECK-P9-NEXT: blr 51; 52; CHECK-P9-NO-HEURISTIC-LABEL: test: 53; CHECK-P9-NO-HEURISTIC: # %bb.0: # %entry 54; CHECK-P9-NO-HEURISTIC-NEXT: ld 5, 0(5) 55; CHECK-P9-NO-HEURISTIC-NEXT: addis 6, 2, scalars@toc@ha 56; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, scalars@toc@l 57; CHECK-P9-NO-HEURISTIC-NEXT: rldicr 5, 5, 0, 58 58; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, 16 59; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, -32 60; CHECK-P9-NO-HEURISTIC-NEXT: lxvdsx 0, 0, 6 61; CHECK-P9-NO-HEURISTIC-NEXT: rldicl 5, 5, 59, 5 62; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, 1 63; CHECK-P9-NO-HEURISTIC-NEXT: mtctr 5 64; CHECK-P9-NO-HEURISTIC-NEXT: .p2align 4 65; CHECK-P9-NO-HEURISTIC-NEXT: .LBB0_1: # %vector.body 66; CHECK-P9-NO-HEURISTIC-NEXT: # 67; CHECK-P9-NO-HEURISTIC-NEXT: lxv 1, 16(4) 68; CHECK-P9-NO-HEURISTIC-NEXT: lxv 2, 0(4) 69; CHECK-P9-NO-HEURISTIC-NEXT: lxv 3, 48(4) 70; CHECK-P9-NO-HEURISTIC-NEXT: lxv 4, 32(4) 71; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 2, 2, 0 72; CHECK-P9-NO-HEURISTIC-NEXT: lxv 5, 240(4) 73; CHECK-P9-NO-HEURISTIC-NEXT: lxv 6, 224(4) 74; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 1, 1, 0 75; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 4, 4, 0 76; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 3, 3, 0 77; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 6, 6, 0 78; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 5, 5, 0 79; CHECK-P9-NO-HEURISTIC-NEXT: addi 4, 4, 256 80; CHECK-P9-NO-HEURISTIC-NEXT: stxv 1, 16(3) 81; CHECK-P9-NO-HEURISTIC-NEXT: stxv 2, 0(3) 82; CHECK-P9-NO-HEURISTIC-NEXT: stxv 3, 48(3) 83; CHECK-P9-NO-HEURISTIC-NEXT: stxv 4, 32(3) 84; CHECK-P9-NO-HEURISTIC-NEXT: stxv 5, 240(3) 85; CHECK-P9-NO-HEURISTIC-NEXT: stxv 6, 224(3) 86; CHECK-P9-NO-HEURISTIC-NEXT: addi 3, 3, 256 87; CHECK-P9-NO-HEURISTIC-NEXT: bdnz .LBB0_1 88; CHECK-P9-NO-HEURISTIC-NEXT: # %bb.2: # %return.block 89; CHECK-P9-NO-HEURISTIC-NEXT: blr 90entry: 91 %x_rvo_based_addr_3 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1 92 %a_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_a], ptr %.a, i64 0, i64 -1 93 %_val_n_ = load i64, ptr %.n, align 8 94 %_val_c1_ = load double, ptr getelementptr inbounds (%_type_of_scalars, ptr @scalars, i64 0, i32 1), align 16 95 %n.vec = and i64 %_val_n_, -32 96 %broadcast.splatinsert26 = insertelement <4 x double> undef, double %_val_c1_, i32 0 97 %broadcast.splat27 = shufflevector <4 x double> %broadcast.splatinsert26, <4 x double> undef, <4 x i32> zeroinitializer 98 br label %vector.body 99 100vector.body: 101 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 102 %offset.idx = or disjoint i64 %index, 1 103 %0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0 104 %1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0 105 %wide.load = load <4 x double>, ptr %1, align 8 106 %2 = getelementptr double, ptr %1, i64 4 107 %wide.load19 = load <4 x double>, ptr %2, align 8 108 %3 = getelementptr double, ptr %1, i64 8 109 %wide.load20 = load <4 x double>, ptr %3, align 8 110 %4 = getelementptr double, ptr %1, i64 12 111 %wide.load21 = load <4 x double>, ptr %4, align 8 112 %5 = getelementptr double, ptr %1, i64 16 113 %wide.load22 = load <4 x double>, ptr %5, align 8 114 %6 = getelementptr double, ptr %1, i64 20 115 %wide.load23 = load <4 x double>, ptr %6, align 8 116 %7 = getelementptr double, ptr %1, i64 24 117 %wide.load24 = load <4 x double>, ptr %7, align 8 118 %8 = getelementptr double, ptr %1, i64 28 119 %wide.load25 = load <4 x double>, ptr %8, align 8 120 %9 = fmul fast <4 x double> %wide.load, %broadcast.splat27 121 %10 = fmul fast <4 x double> %wide.load19, %broadcast.splat27 122 %11 = fmul fast <4 x double> %wide.load20, %broadcast.splat27 123 %12 = fmul fast <4 x double> %wide.load21, %broadcast.splat27 124 %13 = fmul fast <4 x double> %wide.load22, %broadcast.splat27 125 %14 = fmul fast <4 x double> %wide.load23, %broadcast.splat27 126 %15 = fmul fast <4 x double> %wide.load24, %broadcast.splat27 127 %16 = fmul fast <4 x double> %wide.load25, %broadcast.splat27 128 store <4 x double> %9, ptr %0, align 8 129 %17 = getelementptr double, ptr %0, i64 4 130 store <4 x double> %10, ptr %17, align 8 131 %18 = getelementptr double, ptr %0, i64 8 132 %19 = getelementptr double, ptr %0, i64 12 133 %20 = getelementptr double, ptr %0, i64 16 134 %21 = getelementptr double, ptr %0, i64 20 135 %22 = getelementptr double, ptr %0, i64 24 136 %23 = getelementptr double, ptr %0, i64 28 137 store <4 x double> %16, ptr %23, align 8 138 %index.next = add i64 %index, 32 139 %cm = icmp eq i64 %index.next, %n.vec 140 br i1 %cm, label %return.block, label %vector.body 141 142return.block: 143 ret void 144} 145 146