1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s 3 4; Check propagation of optional IR flags (PR20802). For a flag to 5; propagate from scalar instructions to their vector replacement, 6; *all* scalar instructions must have the flag. 7 8target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define void @exact(ptr %x) { 12; CHECK-LABEL: @exact( 13; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 14; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], splat (i32 1) 15; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 16; CHECK-NEXT: ret void 17; 18 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 19 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 20 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 21 22 %load1 = load i32, ptr %x, align 4 23 %load2 = load i32, ptr %idx2, align 4 24 %load3 = load i32, ptr %idx3, align 4 25 %load4 = load i32, ptr %idx4, align 4 26 27 %op1 = lshr exact i32 %load1, 1 28 %op2 = lshr exact i32 %load2, 1 29 %op3 = lshr exact i32 %load3, 1 30 %op4 = lshr exact i32 %load4, 1 31 32 store i32 %op1, ptr %x, align 4 33 store i32 %op2, ptr %idx2, align 4 34 store i32 %op3, ptr %idx3, align 4 35 store i32 %op4, ptr %idx4, align 4 36 37 ret void 38} 39 40define void @not_exact(ptr %x) { 41; CHECK-LABEL: @not_exact( 42; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 43; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], splat (i32 1) 44; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 45; CHECK-NEXT: ret void 46; 47 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 48 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 49 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 50 51 %load1 = load i32, ptr %x, align 4 52 %load2 = load i32, ptr %idx2, align 4 53 %load3 = load i32, ptr %idx3, align 4 54 %load4 = load i32, ptr %idx4, align 4 55 56 %op1 = lshr exact i32 %load1, 1 57 %op2 = lshr i32 %load2, 1 58 %op3 = lshr exact i32 %load3, 1 59 %op4 = lshr exact i32 %load4, 1 60 61 store i32 %op1, ptr %x, align 4 62 store i32 %op2, ptr %idx2, align 4 63 store i32 %op3, ptr %idx3, align 4 64 store i32 %op4, ptr %idx4, align 4 65 66 ret void 67} 68 69define void @nsw(ptr %x) { 70; CHECK-LABEL: @nsw( 71; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 72; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) 73; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 74; CHECK-NEXT: ret void 75; 76 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 77 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 78 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 79 80 %load1 = load i32, ptr %x, align 4 81 %load2 = load i32, ptr %idx2, align 4 82 %load3 = load i32, ptr %idx3, align 4 83 %load4 = load i32, ptr %idx4, align 4 84 85 %op1 = add nsw i32 %load1, 1 86 %op2 = add nsw i32 %load2, 1 87 %op3 = add nsw i32 %load3, 1 88 %op4 = add nsw i32 %load4, 1 89 90 store i32 %op1, ptr %x, align 4 91 store i32 %op2, ptr %idx2, align 4 92 store i32 %op3, ptr %idx3, align 4 93 store i32 %op4, ptr %idx4, align 4 94 95 ret void 96} 97 98define void @not_nsw(ptr %x) { 99; CHECK-LABEL: @not_nsw( 100; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 101; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) 102; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 103; CHECK-NEXT: ret void 104; 105 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 106 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 107 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 108 109 %load1 = load i32, ptr %x, align 4 110 %load2 = load i32, ptr %idx2, align 4 111 %load3 = load i32, ptr %idx3, align 4 112 %load4 = load i32, ptr %idx4, align 4 113 114 %op1 = add nsw i32 %load1, 1 115 %op2 = add nsw i32 %load2, 1 116 %op3 = add nsw i32 %load3, 1 117 %op4 = add i32 %load4, 1 118 119 store i32 %op1, ptr %x, align 4 120 store i32 %op2, ptr %idx2, align 4 121 store i32 %op3, ptr %idx3, align 4 122 store i32 %op4, ptr %idx4, align 4 123 124 ret void 125} 126 127define void @nuw(ptr %x) { 128; CHECK-LABEL: @nuw( 129; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 130; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) 131; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 132; CHECK-NEXT: ret void 133; 134 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 135 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 136 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 137 138 %load1 = load i32, ptr %x, align 4 139 %load2 = load i32, ptr %idx2, align 4 140 %load3 = load i32, ptr %idx3, align 4 141 %load4 = load i32, ptr %idx4, align 4 142 143 %op1 = add nuw i32 %load1, 1 144 %op2 = add nuw i32 %load2, 1 145 %op3 = add nuw i32 %load3, 1 146 %op4 = add nuw i32 %load4, 1 147 148 store i32 %op1, ptr %x, align 4 149 store i32 %op2, ptr %idx2, align 4 150 store i32 %op3, ptr %idx3, align 4 151 store i32 %op4, ptr %idx4, align 4 152 153 ret void 154} 155 156define void @not_nuw(ptr %x) { 157; CHECK-LABEL: @not_nuw( 158; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 159; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) 160; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 161; CHECK-NEXT: ret void 162; 163 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 164 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 165 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 166 167 %load1 = load i32, ptr %x, align 4 168 %load2 = load i32, ptr %idx2, align 4 169 %load3 = load i32, ptr %idx3, align 4 170 %load4 = load i32, ptr %idx4, align 4 171 172 %op1 = add nuw i32 %load1, 1 173 %op2 = add i32 %load2, 1 174 %op3 = add i32 %load3, 1 175 %op4 = add nuw i32 %load4, 1 176 177 store i32 %op1, ptr %x, align 4 178 store i32 %op2, ptr %idx2, align 4 179 store i32 %op3, ptr %idx3, align 4 180 store i32 %op4, ptr %idx4, align 4 181 182 ret void 183} 184 185define void @not_nsw_but_nuw(ptr %x) { 186; CHECK-LABEL: @not_nsw_but_nuw( 187; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 188; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) 189; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 190; CHECK-NEXT: ret void 191; 192 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 193 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 194 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 195 196 %load1 = load i32, ptr %x, align 4 197 %load2 = load i32, ptr %idx2, align 4 198 %load3 = load i32, ptr %idx3, align 4 199 %load4 = load i32, ptr %idx4, align 4 200 201 %op1 = add nuw i32 %load1, 1 202 %op2 = add nuw nsw i32 %load2, 1 203 %op3 = add nuw nsw i32 %load3, 1 204 %op4 = add nuw i32 %load4, 1 205 206 store i32 %op1, ptr %x, align 4 207 store i32 %op2, ptr %idx2, align 4 208 store i32 %op3, ptr %idx3, align 4 209 store i32 %op4, ptr %idx4, align 4 210 211 ret void 212} 213 214define void @nnan(ptr %x) { 215; CHECK-LABEL: @nnan( 216; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 217; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], splat (float 1.000000e+00) 218; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 219; CHECK-NEXT: ret void 220; 221 %idx2 = getelementptr inbounds float, ptr %x, i64 1 222 %idx3 = getelementptr inbounds float, ptr %x, i64 2 223 %idx4 = getelementptr inbounds float, ptr %x, i64 3 224 225 %load1 = load float, ptr %x, align 4 226 %load2 = load float, ptr %idx2, align 4 227 %load3 = load float, ptr %idx3, align 4 228 %load4 = load float, ptr %idx4, align 4 229 230 %op1 = fadd fast nnan float %load1, 1.0 231 %op2 = fadd nnan ninf float %load2, 1.0 232 %op3 = fadd nsz nnan float %load3, 1.0 233 %op4 = fadd arcp nnan float %load4, 1.0 234 235 store float %op1, ptr %x, align 4 236 store float %op2, ptr %idx2, align 4 237 store float %op3, ptr %idx3, align 4 238 store float %op4, ptr %idx4, align 4 239 240 ret void 241} 242 243define void @not_nnan(ptr %x) { 244; CHECK-LABEL: @not_nnan( 245; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 246; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], splat (float 1.000000e+00) 247; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 248; CHECK-NEXT: ret void 249; 250 %idx2 = getelementptr inbounds float, ptr %x, i64 1 251 %idx3 = getelementptr inbounds float, ptr %x, i64 2 252 %idx4 = getelementptr inbounds float, ptr %x, i64 3 253 254 %load1 = load float, ptr %x, align 4 255 %load2 = load float, ptr %idx2, align 4 256 %load3 = load float, ptr %idx3, align 4 257 %load4 = load float, ptr %idx4, align 4 258 259 %op1 = fadd nnan float %load1, 1.0 260 %op2 = fadd ninf float %load2, 1.0 261 %op3 = fadd nsz float %load3, 1.0 262 %op4 = fadd arcp float %load4, 1.0 263 264 store float %op1, ptr %x, align 4 265 store float %op2, ptr %idx2, align 4 266 store float %op3, ptr %idx3, align 4 267 store float %op4, ptr %idx4, align 4 268 269 ret void 270} 271 272define void @only_fast(ptr %x) { 273; CHECK-LABEL: @only_fast( 274; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 275; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00) 276; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 277; CHECK-NEXT: ret void 278; 279 %idx2 = getelementptr inbounds float, ptr %x, i64 1 280 %idx3 = getelementptr inbounds float, ptr %x, i64 2 281 %idx4 = getelementptr inbounds float, ptr %x, i64 3 282 283 %load1 = load float, ptr %x, align 4 284 %load2 = load float, ptr %idx2, align 4 285 %load3 = load float, ptr %idx3, align 4 286 %load4 = load float, ptr %idx4, align 4 287 288 %op1 = fadd fast nnan float %load1, 1.0 289 %op2 = fadd fast nnan ninf float %load2, 1.0 290 %op3 = fadd fast nsz nnan float %load3, 1.0 291 %op4 = fadd arcp nnan fast float %load4, 1.0 292 293 store float %op1, ptr %x, align 4 294 store float %op2, ptr %idx2, align 4 295 store float %op3, ptr %idx3, align 4 296 store float %op4, ptr %idx4, align 4 297 298 ret void 299} 300 301define void @only_arcp(ptr %x) { 302; CHECK-LABEL: @only_arcp( 303; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 304; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], splat (float 1.000000e+00) 305; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 306; CHECK-NEXT: ret void 307; 308 %idx2 = getelementptr inbounds float, ptr %x, i64 1 309 %idx3 = getelementptr inbounds float, ptr %x, i64 2 310 %idx4 = getelementptr inbounds float, ptr %x, i64 3 311 312 %load1 = load float, ptr %x, align 4 313 %load2 = load float, ptr %idx2, align 4 314 %load3 = load float, ptr %idx3, align 4 315 %load4 = load float, ptr %idx4, align 4 316 317 %op1 = fadd fast float %load1, 1.0 318 %op2 = fadd fast float %load2, 1.0 319 %op3 = fadd fast float %load3, 1.0 320 %op4 = fadd arcp float %load4, 1.0 321 322 store float %op1, ptr %x, align 4 323 store float %op2, ptr %idx2, align 4 324 store float %op3, ptr %idx3, align 4 325 store float %op4, ptr %idx4, align 4 326 327 ret void 328} 329 330define void @addsub_all_nsw(ptr %x) { 331; CHECK-LABEL: @addsub_all_nsw( 332; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 333; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) 334; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1) 335; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 336; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 337; CHECK-NEXT: ret void 338; 339 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 340 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 341 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 342 343 %load1 = load i32, ptr %x, align 4 344 %load2 = load i32, ptr %idx2, align 4 345 %load3 = load i32, ptr %idx3, align 4 346 %load4 = load i32, ptr %idx4, align 4 347 348 %op1 = add nsw i32 %load1, 1 349 %op2 = sub nsw i32 %load2, 1 350 %op3 = add nsw i32 %load3, 1 351 %op4 = sub nsw i32 %load4, 1 352 353 store i32 %op1, ptr %x, align 4 354 store i32 %op2, ptr %idx2, align 4 355 store i32 %op3, ptr %idx3, align 4 356 store i32 %op4, ptr %idx4, align 4 357 358 ret void 359} 360 361define void @addsub_some_nsw(ptr %x) { 362; CHECK-LABEL: @addsub_some_nsw( 363; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 364; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) 365; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) 366; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 367; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 368; CHECK-NEXT: ret void 369; 370 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 371 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 372 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 373 374 %load1 = load i32, ptr %x, align 4 375 %load2 = load i32, ptr %idx2, align 4 376 %load3 = load i32, ptr %idx3, align 4 377 %load4 = load i32, ptr %idx4, align 4 378 379 %op1 = add nsw i32 %load1, 1 380 %op2 = sub nsw i32 %load2, 1 381 %op3 = add nsw i32 %load3, 1 382 %op4 = sub i32 %load4, 1 383 384 store i32 %op1, ptr %x, align 4 385 store i32 %op2, ptr %idx2, align 4 386 store i32 %op3, ptr %idx3, align 4 387 store i32 %op4, ptr %idx4, align 4 388 389 ret void 390} 391 392define void @addsub_no_nsw(ptr %x) { 393; CHECK-LABEL: @addsub_no_nsw( 394; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 395; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) 396; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) 397; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 398; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 399; CHECK-NEXT: ret void 400; 401 %idx2 = getelementptr inbounds i32, ptr %x, i64 1 402 %idx3 = getelementptr inbounds i32, ptr %x, i64 2 403 %idx4 = getelementptr inbounds i32, ptr %x, i64 3 404 405 %load1 = load i32, ptr %x, align 4 406 %load2 = load i32, ptr %idx2, align 4 407 %load3 = load i32, ptr %idx3, align 4 408 %load4 = load i32, ptr %idx4, align 4 409 410 %op1 = add i32 %load1, 1 411 %op2 = sub nsw i32 %load2, 1 412 %op3 = add nsw i32 %load3, 1 413 %op4 = sub i32 %load4, 1 414 415 store i32 %op1, ptr %x, align 4 416 store i32 %op2, ptr %idx2, align 4 417 store i32 %op3, ptr %idx3, align 4 418 store i32 %op4, ptr %idx4, align 4 419 420 ret void 421} 422 423define void @fcmp_fast(ptr %x) #1 { 424; CHECK-LABEL: @fcmp_fast( 425; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 426; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer 427; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> splat (double -0.000000e+00), [[TMP2]] 428; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] 429; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 430; CHECK-NEXT: ret void 431; 432 %idx2 = getelementptr inbounds double, ptr %x, i64 1 433 434 %load1 = load double, ptr %x, align 8 435 %load2 = load double, ptr %idx2, align 8 436 437 %cmp1 = fcmp fast oge double %load1, 0.000000e+00 438 %cmp2 = fcmp fast oge double %load2, 0.000000e+00 439 440 %sub1 = fsub fast double -0.000000e+00, %load1 441 %sub2 = fsub fast double -0.000000e+00, %load2 442 443 %sel1 = select i1 %cmp1, double %load1, double %sub1 444 %sel2 = select i1 %cmp2, double %load2, double %sub2 445 446 store double %sel1, ptr %x, align 8 447 store double %sel2, ptr %idx2, align 8 448 449 ret void 450} 451 452define void @fcmp_fast_unary_fneg(ptr %x) #1 { 453; CHECK-LABEL: @fcmp_fast_unary_fneg( 454; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 455; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer 456; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]] 457; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] 458; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 459; CHECK-NEXT: ret void 460; 461 %idx2 = getelementptr inbounds double, ptr %x, i64 1 462 463 %load1 = load double, ptr %x, align 8 464 %load2 = load double, ptr %idx2, align 8 465 466 %cmp1 = fcmp fast oge double %load1, 0.000000e+00 467 %cmp2 = fcmp fast oge double %load2, 0.000000e+00 468 469 %sub1 = fneg fast double %load1 470 %sub2 = fneg fast double %load2 471 472 %sel1 = select i1 %cmp1, double %load1, double %sub1 473 %sel2 = select i1 %cmp2, double %load2, double %sub2 474 475 store double %sel1, ptr %x, align 8 476 store double %sel2, ptr %idx2, align 8 477 478 ret void 479} 480 481define void @fcmp_no_fast(ptr %x) #1 { 482; CHECK-LABEL: @fcmp_no_fast( 483; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 484; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer 485; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]] 486; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] 487; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 488; CHECK-NEXT: ret void 489; 490 %idx2 = getelementptr inbounds double, ptr %x, i64 1 491 492 %load1 = load double, ptr %x, align 8 493 %load2 = load double, ptr %idx2, align 8 494 495 %cmp1 = fcmp fast oge double %load1, 0.000000e+00 496 %cmp2 = fcmp oge double %load2, 0.000000e+00 497 498 %sub1 = fsub fast double -0.000000e+00, %load1 499 %sub2 = fsub double -0.000000e+00, %load2 500 501 %sel1 = select i1 %cmp1, double %load1, double %sub1 502 %sel2 = select i1 %cmp2, double %load2, double %sub2 503 504 store double %sel1, ptr %x, align 8 505 store double %sel2, ptr %idx2, align 8 506 507 ret void 508} 509 510define void @fcmp_no_fast_unary_fneg(ptr %x) #1 { 511; CHECK-LABEL: @fcmp_no_fast_unary_fneg( 512; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 513; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer 514; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]] 515; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] 516; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 517; CHECK-NEXT: ret void 518; 519 %idx2 = getelementptr inbounds double, ptr %x, i64 1 520 521 %load1 = load double, ptr %x, align 8 522 %load2 = load double, ptr %idx2, align 8 523 524 %cmp1 = fcmp fast oge double %load1, 0.000000e+00 525 %cmp2 = fcmp oge double %load2, 0.000000e+00 526 527 %sub1 = fneg double %load1 528 %sub2 = fneg double %load2 529 530 %sel1 = select i1 %cmp1, double %load1, double %sub1 531 %sel2 = select i1 %cmp2, double %load2, double %sub2 532 533 store double %sel1, ptr %x, align 8 534 store double %sel2, ptr %idx2, align 8 535 536 ret void 537} 538 539declare double @llvm.fabs.f64(double) nounwind readnone 540 541define void @call_fast(ptr %x) { 542; CHECK-LABEL: @call_fast( 543; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 544; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]]) 545; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[X]], align 8 546; CHECK-NEXT: ret void 547; 548 %idx2 = getelementptr inbounds double, ptr %x, i64 1 549 550 %load1 = load double, ptr %x, align 8 551 %load2 = load double, ptr %idx2, align 8 552 553 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone 554 %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone 555 556 store double %call1, ptr %x, align 8 557 store double %call2, ptr %idx2, align 8 558 559 ret void 560} 561 562define void @call_no_fast(ptr %x) { 563; CHECK-LABEL: @call_no_fast( 564; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 565; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]]) 566; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[X]], align 8 567; CHECK-NEXT: ret void 568; 569 %idx2 = getelementptr inbounds double, ptr %x, i64 1 570 571 %load1 = load double, ptr %x, align 8 572 %load2 = load double, ptr %idx2, align 8 573 574 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone 575 %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone 576 577 store double %call1, ptr %x, align 8 578 store double %call2, ptr %idx2, align 8 579 580 ret void 581} 582 583attributes #1 = { "target-features"="+avx" } 584