1; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s 2; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s 3; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s 4; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s 5; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s 6; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s 7; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s 8 9; Exponent is a variable 10define void @vspow_var(ptr nocapture %z, ptr nocapture readonly %y, ptr nocapture readonly %x) { 11; CHECK-LABEL: @vspow_var 12; CHECK-PWR10: __powf4_P10 13; CHECK-PWR9: __powf4_P9 14; CHECK-PWR8: __powf4_P8 15; CHECK-PWR7: __powf4_P7 16; CHECK: blr 17entry: 18 br label %vector.body 19 20vector.body: 21 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 22 %next.gep = getelementptr float, ptr %z, i64 %index 23 %next.gep31 = getelementptr float, ptr %y, i64 %index 24 %next.gep32 = getelementptr float, ptr %x, i64 %index 25 %wide.load = load <4 x float>, ptr %next.gep32, align 4 26 %wide.load33 = load <4 x float>, ptr %next.gep31, align 4 27 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33) 28 store <4 x float> %0, ptr %next.gep, align 4 29 %index.next = add i64 %index, 4 30 %1 = icmp eq i64 %index.next, 1024 31 br i1 %1, label %for.end, label %vector.body 32 33for.end: 34 ret void 35} 36 37; Exponent is a constant != 0.75 and !=0.25 38define void @vspow_const(ptr nocapture %y, ptr nocapture readonly %x) { 39; CHECK-LABEL: @vspow_const 40; CHECK-PWR10: __powf4_P10 41; CHECK-PWR9: __powf4_P9 42; CHECK-PWR8: __powf4_P8 43; CHECK-PWR7: __powf4_P7 44; CHECK: blr 45entry: 46 br label %vector.body 47 48vector.body: 49 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 50 %next.gep = getelementptr float, ptr %y, i64 %index 51 %next.gep19 = getelementptr float, ptr %x, i64 %index 52 %wide.load = load <4 x float>, ptr %next.gep19, align 4 53 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>) 54 store <4 x float> %0, ptr %next.gep, align 4 55 %index.next = add i64 %index, 4 56 %1 = icmp eq i64 %index.next, 1024 57 br i1 %1, label %for.end, label %vector.body 58 59for.end: 60 ret void 61} 62 63; Exponent is a constant != 0.75 and !=0.25 and they are different 64define void @vspow_neq_const(ptr nocapture %y, ptr nocapture readonly %x) { 65; CHECK-LABEL: @vspow_neq_const 66; CHECK-PWR10: __powf4_P10 67; CHECK-PWR9: __powf4_P9 68; CHECK-PWR8: __powf4_P8 69; CHECK-PWR7: __powf4_P7 70; CHECK: blr 71entry: 72 br label %vector.body 73 74vector.body: 75 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 76 %next.gep = getelementptr float, ptr %y, i64 %index 77 %next.gep19 = getelementptr float, ptr %x, i64 %index 78 %wide.load = load <4 x float>, ptr %next.gep19, align 4 79 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>) 80 store <4 x float> %0, ptr %next.gep, align 4 81 %index.next = add i64 %index, 4 82 %1 = icmp eq i64 %index.next, 1024 83 br i1 %1, label %for.end, label %vector.body 84 85for.end: 86 ret void 87} 88 89; Exponent is a constant != 0.75 and !=0.25 90define void @vspow_neq075_const(ptr nocapture %y, ptr nocapture readonly %x) { 91; CHECK-LABEL: @vspow_neq075_const 92; CHECK-PWR10: __powf4_P10 93; CHECK-PWR9: __powf4_P9 94; CHECK-PWR8: __powf4_P8 95; CHECK-PWR7: __powf4_P7 96; CHECK: blr 97entry: 98 br label %vector.body 99 100vector.body: 101 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 102 %next.gep = getelementptr float, ptr %y, i64 %index 103 %next.gep19 = getelementptr float, ptr %x, i64 %index 104 %wide.load = load <4 x float>, ptr %next.gep19, align 4 105 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>) 106 store <4 x float> %0, ptr %next.gep, align 4 107 %index.next = add i64 %index, 4 108 %1 = icmp eq i64 %index.next, 1024 109 br i1 %1, label %for.end, label %vector.body 110 111for.end: 112 ret void 113} 114 115; Exponent is a constant != 0.75 and !=0.25 116define void @vspow_neq025_const(ptr nocapture %y, ptr nocapture readonly %x) { 117; CHECK-LABEL: @vspow_neq025_const 118; CHECK-PWR10: __powf4_P10 119; CHECK-PWR9: __powf4_P9 120; CHECK-PWR8: __powf4_P8 121; CHECK-PWR7: __powf4_P7 122; CHECK: blr 123entry: 124 br label %vector.body 125 126vector.body: 127 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 128 %next.gep = getelementptr float, ptr %y, i64 %index 129 %next.gep19 = getelementptr float, ptr %x, i64 %index 130 %wide.load = load <4 x float>, ptr %next.gep19, align 4 131 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>) 132 store <4 x float> %0, ptr %next.gep, align 4 133 %index.next = add i64 %index, 4 134 %1 = icmp eq i64 %index.next, 1024 135 br i1 %1, label %for.end, label %vector.body 136 137for.end: 138 ret void 139} 140 141; Exponent is 0.75 142define void @vspow_075(ptr nocapture %y, ptr nocapture readonly %x) { 143; CHECK-LABEL: @vspow_075 144; CHECK-NOT: __powf4_P{{[7,8,9,10]}} 145; CHECK: xvrsqrtesp 146; CHECK: blr 147entry: 148 br label %vector.body 149 150vector.body: 151 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 152 %next.gep = getelementptr float, ptr %y, i64 %index 153 %next.gep19 = getelementptr float, ptr %x, i64 %index 154 %wide.load = load <4 x float>, ptr %next.gep19, align 4 155 %0 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>) 156 store <4 x float> %0, ptr %next.gep, align 4 157 %index.next = add i64 %index, 4 158 %1 = icmp eq i64 %index.next, 1024 159 br i1 %1, label %for.end, label %vector.body 160 161for.end: 162 ret void 163} 164 165; Exponent is 0.25 166define void @vspow_025(ptr nocapture %y, ptr nocapture readonly %x) { 167; CHECK-LABEL: @vspow_025 168; CHECK-NOT: __powf4_P{{[7,8,9,10]}} 169; CHECK: xvrsqrtesp 170; CHECK: blr 171entry: 172 br label %vector.body 173 174vector.body: 175 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 176 %next.gep = getelementptr float, ptr %y, i64 %index 177 %next.gep19 = getelementptr float, ptr %x, i64 %index 178 %wide.load = load <4 x float>, ptr %next.gep19, align 4 179 %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>) 180 store <4 x float> %0, ptr %next.gep, align 4 181 %index.next = add i64 %index, 4 182 %1 = icmp eq i64 %index.next, 1024 183 br i1 %1, label %for.end, label %vector.body 184 185for.end: 186 ret void 187} 188 189; Exponent is 0.75 but no proper fast-math flags 190define void @vspow_075_nofast(ptr nocapture %y, ptr nocapture readonly %x) { 191; CHECK-LABEL: @vspow_075_nofast 192; CHECK-PWR10: __powf4_P10 193; CHECK-PWR9: __powf4_P9 194; CHECK-PWR8: __powf4_P8 195; CHECK-PWR7: __powf4_P7 196; CHECK-NOT: xvrsqrtesp 197; CHECK: blr 198entry: 199 br label %vector.body 200 201vector.body: 202 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 203 %next.gep = getelementptr float, ptr %y, i64 %index 204 %next.gep19 = getelementptr float, ptr %x, i64 %index 205 %wide.load = load <4 x float>, ptr %next.gep19, align 4 206 %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>) 207 store <4 x float> %0, ptr %next.gep, align 4 208 %index.next = add i64 %index, 4 209 %1 = icmp eq i64 %index.next, 1024 210 br i1 %1, label %for.end, label %vector.body 211 212for.end: 213 ret void 214} 215 216; Exponent is 0.25 but no proper fast-math flags 217define void @vspow_025_nofast(ptr nocapture %y, ptr nocapture readonly %x) { 218; CHECK-LABEL: @vspow_025_nofast 219; CHECK-PWR10: __powf4_P10 220; CHECK-PWR9: __powf4_P9 221; CHECK-PWR8: __powf4_P8 222; CHECK-PWR7: __powf4_P7 223; CHECK-NOT: xvrsqrtesp 224; CHECK: blr 225entry: 226 br label %vector.body 227 228vector.body: 229 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 230 %next.gep = getelementptr float, ptr %y, i64 %index 231 %next.gep19 = getelementptr float, ptr %x, i64 %index 232 %wide.load = load <4 x float>, ptr %next.gep19, align 4 233 %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>) 234 store <4 x float> %0, ptr %next.gep, align 4 235 %index.next = add i64 %index, 4 236 %1 = icmp eq i64 %index.next, 1024 237 br i1 %1, label %for.end, label %vector.body 238 239for.end: 240 ret void 241} 242 243; Function Attrs: nounwind readnone speculatable willreturn 244declare <4 x float> @__powf4(<4 x float>, <4 x float>) 245