1; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s 2 3; Test to verify that when math headers are built with 4; __FINITE_MATH_ONLY__ enabled, causing use of __<func>_finite 5; function versions, vectorization can map these to vector versions. 6 7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-linux-gnu" 9 10define void @exp_f32(ptr nocapture %varray) { 11; CHECK-LABEL: @exp_f32 12; CHECK: <4 x float> @amd_vrs4_expf 13; CHECK: ret 14entry: 15 br label %for.body 16 17for.body: ; preds = %for.body, %entry 18 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 19 %tmp = trunc i64 %indvars.iv to i32 20 %conv = sitofp i32 %tmp to float 21 %call = tail call fast float @__expf_finite(float %conv) 22 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 23 store float %call, ptr %arrayidx, align 4 24 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 25 %exitcond = icmp eq i64 %indvars.iv.next, 1000 26 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 27 28for.end: ; preds = %for.body 29 ret void 30} 31 32define void @exp_f64(ptr nocapture %varray) { 33; CHECK-LABEL: @exp_f64 34; CHECK: <4 x double> @amd_vrd4_exp 35; CHECK: ret 36entry: 37 br label %for.body 38 39for.body: ; preds = %for.body, %entry 40 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 41 %tmp = trunc i64 %indvars.iv to i32 42 %conv = sitofp i32 %tmp to double 43 %call = tail call fast double @__exp_finite(double %conv) 44 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv 45 store double %call, ptr %arrayidx, align 4 46 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 47 %exitcond = icmp eq i64 %indvars.iv.next, 1000 48 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 49 50for.end: ; preds = %for.body 51 ret void 52} 53 54define void @log_f32(ptr nocapture %varray) { 55; CHECK-LABEL: @log_f32 56; CHECK: <4 x float> @amd_vrs4_logf 57; CHECK: ret 58entry: 59 br label %for.body 60 61for.body: ; preds = %for.body, %entry 62 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 63 %tmp = trunc i64 %indvars.iv to i32 64 %conv = sitofp i32 %tmp to float 65 %call = tail call fast float @__logf_finite(float %conv) 66 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 67 store float %call, ptr %arrayidx, align 4 68 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 69 %exitcond = icmp eq i64 %indvars.iv.next, 1000 70 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 71 72for.end: ; preds = %for.body 73 ret void 74} 75 76define void @log_f64(ptr nocapture %varray) { 77; CHECK-LABEL: @log_f64 78; CHECK: <4 x double> @amd_vrd4_log 79; CHECK: ret 80entry: 81 br label %for.body 82 83for.body: ; preds = %for.body, %entry 84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 85 %tmp = trunc i64 %indvars.iv to i32 86 %conv = sitofp i32 %tmp to double 87 %call = tail call fast double @__log_finite(double %conv) 88 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv 89 store double %call, ptr %arrayidx, align 4 90 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 91 %exitcond = icmp eq i64 %indvars.iv.next, 1000 92 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 93 94for.end: ; preds = %for.body 95 ret void 96} 97 98define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) { 99; CHECK-LABEL: @pow_f32 100; CHECK: <4 x float> @amd_vrs4_powf 101; CHECK: ret 102entry: 103 br label %for.body 104 105for.body: ; preds = %for.body, %entry 106 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 107 %tmp = trunc i64 %indvars.iv to i32 108 %conv = sitofp i32 %tmp to float 109 %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv 110 %tmp1 = load float, ptr %arrayidx, align 4 111 %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1) 112 %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 113 store float %tmp2, ptr %arrayidx2, align 4 114 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 115 %exitcond = icmp eq i64 %indvars.iv.next, 1000 116 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 117 118for.end: ; preds = %for.body 119 ret void 120} 121 122define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { 123; CHECK-LABEL: @pow_f64 124; CHECK: <4 x double> @amd_vrd4_pow 125; CHECK: ret 126entry: 127 br label %for.body 128 129for.body: ; preds = %for.body, %entry 130 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 131 %tmp = trunc i64 %indvars.iv to i32 132 %conv = sitofp i32 %tmp to double 133 %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv 134 %tmp1 = load double, ptr %arrayidx, align 4 135 %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1) 136 %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv 137 store double %tmp2, ptr %arrayidx2, align 4 138 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 139 %exitcond = icmp eq i64 %indvars.iv.next, 1000 140 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 141 142for.end: ; preds = %for.body 143 ret void 144} 145 146define void @exp2f_finite(ptr nocapture %varray) { 147; CHECK-LABEL: @exp2f_finite( 148; CHECK: call <4 x float> @amd_vrs4_exp2f(<4 x float> %{{.*}}) 149; CHECK: ret void 150; 151entry: 152 br label %for.body 153 154for.body: 155 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 156 %tmp = trunc i64 %iv to i32 157 %conv = sitofp i32 %tmp to float 158 %call = tail call float @__exp2f_finite(float %conv) 159 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv 160 store float %call, ptr %arrayidx, align 4 161 %iv.next = add nuw nsw i64 %iv, 1 162 %exitcond = icmp eq i64 %iv.next, 1000 163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 164 165for.end: 166 ret void 167} 168 169define void @exp2_finite(ptr nocapture %varray) { 170; CHECK-LABEL: @exp2_finite( 171; CHECK: call <4 x double> @amd_vrd4_exp2(<4 x double> {{.*}}) 172; CHECK: ret void 173; 174entry: 175 br label %for.body 176 177for.body: 178 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 179 %tmp = trunc i64 %iv to i32 180 %conv = sitofp i32 %tmp to double 181 %call = tail call double @__exp2_finite(double %conv) 182 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv 183 store double %call, ptr %arrayidx, align 4 184 %iv.next = add nuw nsw i64 %iv, 1 185 %exitcond = icmp eq i64 %iv.next, 1000 186 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 187 188for.end: 189 ret void 190} 191 192define void @log2_f32(ptr nocapture %varray) { 193; CHECK-LABEL: @log2_f32 194; CHECK: <4 x float> @amd_vrs4_log2f 195; CHECK: ret 196entry: 197 br label %for.body 198 199for.body: ; preds = %for.body, %entry 200 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 201 %tmp = trunc i64 %indvars.iv to i32 202 %conv = sitofp i32 %tmp to float 203 %call = tail call fast float @__log2f_finite(float %conv) 204 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 205 store float %call, ptr %arrayidx, align 4 206 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 207 %exitcond = icmp eq i64 %indvars.iv.next, 1000 208 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 209 210for.end: ; preds = %for.body 211 ret void 212} 213 214define void @log2_f64(ptr nocapture %varray) { 215; CHECK-LABEL: @log2_f64 216; CHECK: <4 x double> @amd_vrd4_log2 217; CHECK: ret 218entry: 219 br label %for.body 220 221for.body: ; preds = %for.body, %entry 222 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 223 %tmp = trunc i64 %indvars.iv to i32 224 %conv = sitofp i32 %tmp to double 225 %call = tail call fast double @__log2_finite(double %conv) 226 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv 227 store double %call, ptr %arrayidx, align 4 228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 229 %exitcond = icmp eq i64 %indvars.iv.next, 1000 230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 231 232for.end: ; preds = %for.body 233 ret void 234} 235 236define void @log10_f32(ptr nocapture %varray) { 237; CHECK-LABEL: @log10_f32 238; CHECK: <4 x float> @amd_vrs4_log10f 239; CHECK: ret 240entry: 241 br label %for.body 242 243for.body: ; preds = %for.body, %entry 244 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 245 %tmp = trunc i64 %indvars.iv to i32 246 %conv = sitofp i32 %tmp to float 247 %call = tail call fast float @__log10f_finite(float %conv) 248 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 249 store float %call, ptr %arrayidx, align 4 250 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 251 %exitcond = icmp eq i64 %indvars.iv.next, 1000 252 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 253 254for.end: ; preds = %for.body 255 ret void 256} 257 258define void @log10_finite(ptr nocapture %varray) { 259; CHECK-LABEL: @log10_finite( 260; CHECK: call <2 x double> @amd_vrd2_log10(<2 x double> {{.*}}) 261; CHECK: ret void 262; 263entry: 264 br label %for.body 265 266for.body: 267 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 268 %tmp = trunc i64 %iv to i32 269 %conv = sitofp i32 %tmp to double 270 %call = tail call double @__log10_finite(double %conv) 271 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv 272 store double %call, ptr %arrayidx, align 4 273 %iv.next = add nuw nsw i64 %iv, 1 274 %exitcond = icmp eq i64 %iv.next, 1000 275 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 276 277for.end: 278 ret void 279} 280 281define void @exp10_finite(ptr nocapture %varray) { 282; CHECK-LABEL: @exp10_finite( 283; CHECK: call <2 x double> @amd_vrd2_exp10(<2 x double> {{.*}}) 284; CHECK: ret void 285; 286entry: 287 br label %for.body 288 289for.body: 290 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 291 %tmp = trunc i64 %iv to i32 292 %conv = sitofp i32 %tmp to double 293 %call = tail call double @__exp10_finite(double %conv) 294 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv 295 store double %call, ptr %arrayidx, align 4 296 %iv.next = add nuw nsw i64 %iv, 1 297 %exitcond = icmp eq i64 %iv.next, 1000 298 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 299 300for.end: 301 ret void 302} 303 304define void @exp10_f32(ptr nocapture %varray) { 305; CHECK-LABEL: @exp10_f32 306; CHECK: <4 x float> @amd_vrs4_exp10f 307; CHECK: ret 308entry: 309 br label %for.body 310 311for.body: ; preds = %for.body, %entry 312 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 313 %tmp = trunc i64 %indvars.iv to i32 314 %conv = sitofp i32 %tmp to float 315 %call = tail call fast float @__exp10f_finite(float %conv) 316 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 317 store float %call, ptr %arrayidx, align 4 318 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 319 %exitcond = icmp eq i64 %indvars.iv.next, 1000 320 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 321 322for.end: ; preds = %for.body 323 ret void 324} 325 326define void @asin_finite(ptr nocapture %varray) { 327; CHECK-LABEL: @asin_finite( 328; CHECK: call <8 x double> @amd_vrd8_asin(<8 x double> {{.*}}) 329; CHECK: ret void 330; 331entry: 332 br label %for.body 333 334for.body: 335 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 336 %tmp = trunc i64 %iv to i32 337 %conv = sitofp i32 %tmp to double 338 %call = tail call double @__asin_finite(double %conv) 339 %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv 340 store double %call, ptr %arrayidx, align 4 341 %iv.next = add nuw nsw i64 %iv, 1 342 %exitcond = icmp eq i64 %iv.next, 1000 343 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 344 345for.end: 346 ret void 347} 348 349define void @asinf_finite(ptr nocapture %varray) { 350; CHECK-LABEL: @asinf_finite 351; CHECK: <4 x float> @amd_vrs4_asinf 352; CHECK: ret 353entry: 354 br label %for.body 355 356for.body: ; preds = %for.body, %entry 357 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 358 %tmp = trunc i64 %indvars.iv to i32 359 %conv = sitofp i32 %tmp to float 360 %call = tail call fast float @__asinf_finite(float %conv) 361 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 362 store float %call, ptr %arrayidx, align 4 363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 364 %exitcond = icmp eq i64 %indvars.iv.next, 1000 365 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 366 367for.end: ; preds = %for.body 368 ret void 369} 370 371define void @acosf_finite(ptr nocapture %varray) { 372; CHECK-LABEL: @acosf_finite 373; CHECK: <4 x float> @amd_vrs4_acosf 374; CHECK: ret 375entry: 376 br label %for.body 377 378for.body: ; preds = %for.body, %entry 379 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 380 %tmp = trunc i64 %indvars.iv to i32 381 %conv = sitofp i32 %tmp to float 382 %call = tail call fast float @__acosf_finite(float %conv) 383 %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv 384 store float %call, ptr %arrayidx, align 4 385 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 386 %exitcond = icmp eq i64 %indvars.iv.next, 1000 387 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 388 389for.end: ; preds = %for.body 390 ret void 391} 392 393!1 = distinct !{!1, !2, !3} 394!2 = !{!"llvm.loop.vectorize.width", i32 2} 395!3 = !{!"llvm.loop.vectorize.enable", i1 true} 396 397!4 = distinct !{!4, !5, !6} 398!5 = !{!"llvm.loop.vectorize.width", i32 4} 399!6 = !{!"llvm.loop.vectorize.enable", i1 true} 400 401!7 = distinct !{!7, !8, !9} 402!8 = !{!"llvm.loop.vectorize.width", i32 8} 403!9 = !{!"llvm.loop.vectorize.enable", i1 true} 404 405declare float @__expf_finite(float) #0 406declare double @__exp_finite(double) #0 407declare double @__log_finite(double) #0 408declare float @__logf_finite(float) #0 409declare float @__powf_finite(float, float) #0 410declare double @__pow_finite(double, double) #0 411declare float @__exp2f_finite(float) #0 412declare double @__exp2_finite(double) #0 413declare float @__log2f_finite(float) #0 414declare double @__log2_finite(double) #0 415declare float @__log10f_finite(float) #0 416declare double @__log10_finite(double) #0 417declare double @__exp10_finite(double) #0 418declare float @__exp10f_finite(float) #0 419declare double @__asin_finite(double) #0 420declare float @__asinf_finite(float) #0 421declare float @__acosf_finite(float) #0