1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; REQUIRES: asserts 3; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG 4; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF 5; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG 6; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL 7 8; Test FP transforms using instruction/node-level fast-math-flags. 9; We're also checking debug output to verify that FMF is propagated to the newly created nodes. 10; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes. 11 12declare float @llvm.fma.f32(float, float, float) 13declare float @llvm.sqrt.f32(float) 14 15; X * Y + Z --> fma(X, Y, Z) 16 17; contract bits in fmul is checked. 18 19; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:' 20; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 21; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:' 22 23define float @fmul_fadd_contract1(float %x, float %y, float %z) { 24; FMF-LABEL: fmul_fadd_contract1: 25; FMF: # %bb.0: 26; FMF-NEXT: xsmulsp 0, 1, 2 27; FMF-NEXT: xsaddsp 1, 0, 3 28; FMF-NEXT: blr 29; 30; GLOBAL-LABEL: fmul_fadd_contract1: 31; GLOBAL: # %bb.0: 32; GLOBAL-NEXT: xsmaddasp 3, 1, 2 33; GLOBAL-NEXT: fmr 1, 3 34; GLOBAL-NEXT: blr 35 %mul = fmul float %x, %y 36 %add = fadd contract float %mul, %z 37 ret float %add 38} 39 40; contract bits in fadd is also checked. 41 42; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:' 43; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 44; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:' 45 46define float @fmul_fadd_contract2(float %x, float %y, float %z) { 47; FMF-LABEL: fmul_fadd_contract2: 48; FMF: # %bb.0: 49; FMF-NEXT: xsmulsp 0, 1, 2 50; FMF-NEXT: xsaddsp 1, 0, 3 51; FMF-NEXT: blr 52; 53; GLOBAL-LABEL: fmul_fadd_contract2: 54; GLOBAL: # %bb.0: 55; GLOBAL-NEXT: xsmaddasp 3, 1, 2 56; GLOBAL-NEXT: fmr 1, 3 57; GLOBAL-NEXT: blr 58 %mul = fmul contract float %x, %y 59 %add = fadd float %mul, %z 60 ret float %add 61} 62 63; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:' 64; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 65; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:' 66 67define float @fmul_fadd_contract3(float %x, float %y, float %z) { 68; FMF-LABEL: fmul_fadd_contract3: 69; FMF: # %bb.0: 70; FMF-NEXT: xsmaddasp 3, 1, 2 71; FMF-NEXT: fmr 1, 3 72; FMF-NEXT: blr 73; 74; GLOBAL-LABEL: fmul_fadd_contract3: 75; GLOBAL: # %bb.0: 76; GLOBAL-NEXT: xsmaddasp 3, 1, 2 77; GLOBAL-NEXT: fmr 1, 3 78; GLOBAL-NEXT: blr 79 %mul = fmul contract float %x, %y 80 %add = fadd contract float %mul, %z 81 ret float %add 82} 83 84; Reassociation does NOT imply that FMA contraction is allowed. 85 86; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' 87; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 88; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' 89 90define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { 91; FMF-LABEL: fmul_fadd_reassoc1: 92; FMF: # %bb.0: 93; FMF-NEXT: xsmulsp 0, 1, 2 94; FMF-NEXT: xsaddsp 1, 0, 3 95; FMF-NEXT: blr 96; 97; GLOBAL-LABEL: fmul_fadd_reassoc1: 98; GLOBAL: # %bb.0: 99; GLOBAL-NEXT: xsmaddasp 3, 1, 2 100; GLOBAL-NEXT: fmr 1, 3 101; GLOBAL-NEXT: blr 102 %mul = fmul float %x, %y 103 %add = fadd reassoc float %mul, %z 104 ret float %add 105} 106 107; This shouldn't change anything - the intermediate fmul result is now also flagged. 108; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' 109; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} 110; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' 111 112define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { 113; FMF-LABEL: fmul_fadd_reassoc2: 114; FMF: # %bb.0: 115; FMF-NEXT: xsmulsp 0, 1, 2 116; FMF-NEXT: xsaddsp 1, 0, 3 117; FMF-NEXT: blr 118; 119; GLOBAL-LABEL: fmul_fadd_reassoc2: 120; GLOBAL: # %bb.0: 121; GLOBAL-NEXT: xsmaddasp 3, 1, 2 122; GLOBAL-NEXT: fmr 1, 3 123; GLOBAL-NEXT: blr 124 %mul = fmul reassoc float %x, %y 125 %add = fadd reassoc float %mul, %z 126 ret float %add 127} 128 129; The fadd is now fully 'fast', but fmul is not yet. 130 131; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' 132; FMFDEBUG-NOT: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 133; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:' 134 135define float @fmul_fadd_fast1(float %x, float %y, float %z) { 136; FMF-LABEL: fmul_fadd_fast1: 137; FMF: # %bb.0: 138; FMF-NEXT: xsmulsp 0, 1, 2 139; FMF-NEXT: xsaddsp 1, 0, 3 140; FMF-NEXT: blr 141; 142; GLOBAL-LABEL: fmul_fadd_fast1: 143; GLOBAL: # %bb.0: 144; GLOBAL-NEXT: xsmaddasp 3, 1, 2 145; GLOBAL-NEXT: fmr 1, 3 146; GLOBAL-NEXT: blr 147 %mul = fmul float %x, %y 148 %add = fadd fast float %mul, %z 149 ret float %add 150} 151 152; This implies that contraction is allowed - the intermediate fmul result is now also flagged. 153 154; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:' 155; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 156; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:' 157 158define float @fmul_fadd_fast2(float %x, float %y, float %z) { 159; FMF-LABEL: fmul_fadd_fast2: 160; FMF: # %bb.0: 161; FMF-NEXT: xsmaddasp 3, 1, 2 162; FMF-NEXT: fmr 1, 3 163; FMF-NEXT: blr 164; 165; GLOBAL-LABEL: fmul_fadd_fast2: 166; GLOBAL: # %bb.0: 167; GLOBAL-NEXT: xsmaddasp 3, 1, 2 168; GLOBAL-NEXT: fmr 1, 3 169; GLOBAL-NEXT: blr 170 %mul = fmul fast float %x, %y 171 %add = fadd fast float %mul, %z 172 ret float %add 173} 174 175; fma(X, 7.0, X * 42.0) --> X * 49.0 176; This is the minimum FMF needed for this transform - the FMA allows reassociation. 177 178; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 179; FMFDEBUG: fmul reassoc {{t[0-9]+}}, 180; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 181 182; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 183; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 184; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 185 186define float @fmul_fma_reassoc1(float %x) { 187; FMF-LABEL: fmul_fma_reassoc1: 188; FMF: # %bb.0: 189; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha 190; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) 191; FMF-NEXT: xsmulsp 1, 1, 0 192; FMF-NEXT: blr 193; 194; GLOBAL-LABEL: fmul_fma_reassoc1: 195; GLOBAL: # %bb.0: 196; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha 197; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) 198; GLOBAL-NEXT: xsmulsp 1, 1, 0 199; GLOBAL-NEXT: blr 200 %mul = fmul float %x, 42.0 201 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 202 ret float %fma 203} 204 205; This shouldn't change anything - the intermediate fmul result is now also flagged. 206 207; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 208; FMFDEBUG: fmul reassoc {{t[0-9]+}} 209; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 210 211; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 212; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 213; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 214 215define float @fmul_fma_reassoc2(float %x) { 216; FMF-LABEL: fmul_fma_reassoc2: 217; FMF: # %bb.0: 218; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha 219; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) 220; FMF-NEXT: xsmulsp 1, 1, 0 221; FMF-NEXT: blr 222; 223; GLOBAL-LABEL: fmul_fma_reassoc2: 224; GLOBAL: # %bb.0: 225; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha 226; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) 227; GLOBAL-NEXT: xsmulsp 1, 1, 0 228; GLOBAL-NEXT: blr 229 %mul = fmul reassoc float %x, 42.0 230 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 231 ret float %fma 232} 233 234; The FMA is now fully 'fast'. This implies that reassociation is allowed. 235 236; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 237; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 238; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 239 240; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 241; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 242; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 243 244define float @fmul_fma_fast1(float %x) { 245; FMF-LABEL: fmul_fma_fast1: 246; FMF: # %bb.0: 247; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha 248; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) 249; FMF-NEXT: xsmulsp 1, 1, 0 250; FMF-NEXT: blr 251; 252; GLOBAL-LABEL: fmul_fma_fast1: 253; GLOBAL: # %bb.0: 254; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha 255; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) 256; GLOBAL-NEXT: xsmulsp 1, 1, 0 257; GLOBAL-NEXT: blr 258 %mul = fmul float %x, 42.0 259 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) 260 ret float %fma 261} 262 263; This shouldn't change anything - the intermediate fmul result is now also flagged. 264 265; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 266; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 267; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 268 269; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 270; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 271; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 272 273define float @fmul_fma_fast2(float %x) { 274; FMF-LABEL: fmul_fma_fast2: 275; FMF: # %bb.0: 276; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha 277; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) 278; FMF-NEXT: xsmulsp 1, 1, 0 279; FMF-NEXT: blr 280; 281; GLOBAL-LABEL: fmul_fma_fast2: 282; GLOBAL: # %bb.0: 283; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha 284; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3) 285; GLOBAL-NEXT: xsmulsp 1, 1, 0 286; GLOBAL-NEXT: blr 287 %mul = fmul fast float %x, 42.0 288 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) 289 ret float %fma 290} 291 292; Reduced precision for sqrt is allowed - should use estimate and NR iterations. 293 294; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' 295; FMFDEBUG: fmul ninf afn {{t[0-9]+}} 296; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' 297 298; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' 299; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} 300; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' 301 302define float @sqrt_afn_ieee(float %x) #0 { 303; FMF-LABEL: sqrt_afn_ieee: 304; FMF: # %bb.0: 305; FMF-NEXT: addis 3, 2, .LCPI11_1@toc@ha 306; FMF-NEXT: xsabsdp 0, 1 307; FMF-NEXT: lfs 2, .LCPI11_1@toc@l(3) 308; FMF-NEXT: fcmpu 0, 0, 2 309; FMF-NEXT: xxlxor 0, 0, 0 310; FMF-NEXT: blt 0, .LBB11_2 311; FMF-NEXT: # %bb.1: 312; FMF-NEXT: xsrsqrtesp 2, 1 313; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha 314; FMF-NEXT: vspltisw 2, -3 315; FMF-NEXT: lfs 0, .LCPI11_0@toc@l(3) 316; FMF-NEXT: xsmulsp 1, 1, 2 317; FMF-NEXT: xsmulsp 0, 1, 0 318; FMF-NEXT: xsmulsp 1, 1, 2 319; FMF-NEXT: xvcvsxwdp 2, 34 320; FMF-NEXT: xsaddsp 1, 1, 2 321; FMF-NEXT: xsmulsp 0, 0, 1 322; FMF-NEXT: .LBB11_2: 323; FMF-NEXT: fmr 1, 0 324; FMF-NEXT: blr 325; 326; GLOBAL-LABEL: sqrt_afn_ieee: 327; GLOBAL: # %bb.0: 328; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha 329; GLOBAL-NEXT: xsabsdp 0, 1 330; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) 331; GLOBAL-NEXT: fcmpu 0, 0, 2 332; GLOBAL-NEXT: xxlxor 0, 0, 0 333; GLOBAL-NEXT: blt 0, .LBB11_2 334; GLOBAL-NEXT: # %bb.1: 335; GLOBAL-NEXT: xsrsqrtesp 0, 1 336; GLOBAL-NEXT: vspltisw 2, -3 337; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha 338; GLOBAL-NEXT: xvcvsxwdp 2, 34 339; GLOBAL-NEXT: xsmulsp 1, 1, 0 340; GLOBAL-NEXT: xsmaddasp 2, 1, 0 341; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) 342; GLOBAL-NEXT: xsmulsp 0, 1, 0 343; GLOBAL-NEXT: xsmulsp 0, 0, 2 344; GLOBAL-NEXT: .LBB11_2: 345; GLOBAL-NEXT: fmr 1, 0 346; GLOBAL-NEXT: blr 347 %rt = call afn ninf float @llvm.sqrt.f32(float %x) 348 ret float %rt 349} 350 351define float @sqrt_afn_ieee_inf(float %x) #0 { 352; FMF-LABEL: sqrt_afn_ieee_inf: 353; FMF: # %bb.0: 354; FMF-NEXT: xssqrtsp 1, 1 355; FMF-NEXT: blr 356; 357; GLOBAL-LABEL: sqrt_afn_ieee_inf: 358; GLOBAL: # %bb.0: 359; GLOBAL-NEXT: xssqrtsp 1, 1 360; GLOBAL-NEXT: blr 361 %rt = call afn float @llvm.sqrt.f32(float %x) 362 ret float %rt 363} 364 365; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 366; FMFDEBUG: fmul ninf afn {{t[0-9]+}} 367; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 368 369; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 370; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} 371; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' 372 373define float @sqrt_afn_preserve_sign(float %x) #1 { 374; FMF-LABEL: sqrt_afn_preserve_sign: 375; FMF: # %bb.0: 376; FMF-NEXT: xxlxor 0, 0, 0 377; FMF-NEXT: fcmpu 0, 1, 0 378; FMF-NEXT: beq 0, .LBB13_2 379; FMF-NEXT: # %bb.1: 380; FMF-NEXT: xsrsqrtesp 0, 1 381; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha 382; FMF-NEXT: vspltisw 2, -3 383; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3) 384; FMF-NEXT: xsmulsp 1, 1, 0 385; FMF-NEXT: xsmulsp 2, 1, 2 386; FMF-NEXT: xsmulsp 0, 1, 0 387; FMF-NEXT: xvcvsxwdp 1, 34 388; FMF-NEXT: xsaddsp 0, 0, 1 389; FMF-NEXT: xsmulsp 0, 2, 0 390; FMF-NEXT: .LBB13_2: 391; FMF-NEXT: fmr 1, 0 392; FMF-NEXT: blr 393; 394; GLOBAL-LABEL: sqrt_afn_preserve_sign: 395; GLOBAL: # %bb.0: 396; GLOBAL-NEXT: xxlxor 0, 0, 0 397; GLOBAL-NEXT: fcmpu 0, 1, 0 398; GLOBAL-NEXT: beq 0, .LBB13_2 399; GLOBAL-NEXT: # %bb.1: 400; GLOBAL-NEXT: xsrsqrtesp 0, 1 401; GLOBAL-NEXT: vspltisw 2, -3 402; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha 403; GLOBAL-NEXT: xvcvsxwdp 2, 34 404; GLOBAL-NEXT: xsmulsp 1, 1, 0 405; GLOBAL-NEXT: xsmaddasp 2, 1, 0 406; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) 407; GLOBAL-NEXT: xsmulsp 0, 1, 0 408; GLOBAL-NEXT: xsmulsp 0, 0, 2 409; GLOBAL-NEXT: .LBB13_2: 410; GLOBAL-NEXT: fmr 1, 0 411; GLOBAL-NEXT: blr 412 %rt = call afn ninf float @llvm.sqrt.f32(float %x) 413 ret float %rt 414} 415 416define float @sqrt_afn_preserve_sign_inf(float %x) #1 { 417; FMF-LABEL: sqrt_afn_preserve_sign_inf: 418; FMF: # %bb.0: 419; FMF-NEXT: xssqrtsp 1, 1 420; FMF-NEXT: blr 421; 422; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf: 423; GLOBAL: # %bb.0: 424; GLOBAL-NEXT: xssqrtsp 1, 1 425; GLOBAL-NEXT: blr 426 %rt = call afn float @llvm.sqrt.f32(float %x) 427 ret float %rt 428} 429 430; The call is now fully 'fast'. This implies that approximation is allowed. 431 432; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' 433; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} 434; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' 435 436; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' 437; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} 438; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' 439 440define float @sqrt_fast_ieee(float %x) #0 { 441; FMF-LABEL: sqrt_fast_ieee: 442; FMF: # %bb.0: 443; FMF-NEXT: addis 3, 2, .LCPI15_1@toc@ha 444; FMF-NEXT: xsabsdp 0, 1 445; FMF-NEXT: lfs 2, .LCPI15_1@toc@l(3) 446; FMF-NEXT: fcmpu 0, 0, 2 447; FMF-NEXT: xxlxor 0, 0, 0 448; FMF-NEXT: blt 0, .LBB15_2 449; FMF-NEXT: # %bb.1: 450; FMF-NEXT: xsrsqrtesp 0, 1 451; FMF-NEXT: vspltisw 2, -3 452; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha 453; FMF-NEXT: xvcvsxwdp 2, 34 454; FMF-NEXT: xsmulsp 1, 1, 0 455; FMF-NEXT: xsmaddasp 2, 1, 0 456; FMF-NEXT: lfs 0, .LCPI15_0@toc@l(3) 457; FMF-NEXT: xsmulsp 0, 1, 0 458; FMF-NEXT: xsmulsp 0, 0, 2 459; FMF-NEXT: .LBB15_2: 460; FMF-NEXT: fmr 1, 0 461; FMF-NEXT: blr 462; 463; GLOBAL-LABEL: sqrt_fast_ieee: 464; GLOBAL: # %bb.0: 465; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha 466; GLOBAL-NEXT: xsabsdp 0, 1 467; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) 468; GLOBAL-NEXT: fcmpu 0, 0, 2 469; GLOBAL-NEXT: xxlxor 0, 0, 0 470; GLOBAL-NEXT: blt 0, .LBB15_2 471; GLOBAL-NEXT: # %bb.1: 472; GLOBAL-NEXT: xsrsqrtesp 0, 1 473; GLOBAL-NEXT: vspltisw 2, -3 474; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha 475; GLOBAL-NEXT: xvcvsxwdp 2, 34 476; GLOBAL-NEXT: xsmulsp 1, 1, 0 477; GLOBAL-NEXT: xsmaddasp 2, 1, 0 478; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) 479; GLOBAL-NEXT: xsmulsp 0, 1, 0 480; GLOBAL-NEXT: xsmulsp 0, 0, 2 481; GLOBAL-NEXT: .LBB15_2: 482; GLOBAL-NEXT: fmr 1, 0 483; GLOBAL-NEXT: blr 484 %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) 485 ret float %rt 486} 487 488; The call is now fully 'fast'. This implies that approximation is allowed. 489 490; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 491; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} 492; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 493 494; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 495; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} 496; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' 497 498define float @sqrt_fast_preserve_sign(float %x) #1 { 499; FMF-LABEL: sqrt_fast_preserve_sign: 500; FMF: # %bb.0: 501; FMF-NEXT: xxlxor 0, 0, 0 502; FMF-NEXT: fcmpu 0, 1, 0 503; FMF-NEXT: beq 0, .LBB16_2 504; FMF-NEXT: # %bb.1: 505; FMF-NEXT: xsrsqrtesp 0, 1 506; FMF-NEXT: vspltisw 2, -3 507; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha 508; FMF-NEXT: xvcvsxwdp 2, 34 509; FMF-NEXT: xsmulsp 1, 1, 0 510; FMF-NEXT: xsmaddasp 2, 1, 0 511; FMF-NEXT: lfs 0, .LCPI16_0@toc@l(3) 512; FMF-NEXT: xsmulsp 0, 1, 0 513; FMF-NEXT: xsmulsp 0, 0, 2 514; FMF-NEXT: .LBB16_2: 515; FMF-NEXT: fmr 1, 0 516; FMF-NEXT: blr 517; 518; GLOBAL-LABEL: sqrt_fast_preserve_sign: 519; GLOBAL: # %bb.0: 520; GLOBAL-NEXT: xxlxor 0, 0, 0 521; GLOBAL-NEXT: fcmpu 0, 1, 0 522; GLOBAL-NEXT: beq 0, .LBB16_2 523; GLOBAL-NEXT: # %bb.1: 524; GLOBAL-NEXT: xsrsqrtesp 0, 1 525; GLOBAL-NEXT: vspltisw 2, -3 526; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha 527; GLOBAL-NEXT: xvcvsxwdp 2, 34 528; GLOBAL-NEXT: xsmulsp 1, 1, 0 529; GLOBAL-NEXT: xsmaddasp 2, 1, 0 530; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) 531; GLOBAL-NEXT: xsmulsp 0, 1, 0 532; GLOBAL-NEXT: xsmulsp 0, 0, 2 533; GLOBAL-NEXT: .LBB16_2: 534; GLOBAL-NEXT: fmr 1, 0 535; GLOBAL-NEXT: blr 536 %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) 537 ret float %rt 538} 539 540; fcmp can have fast-math-flags. 541 542; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 543; FMFDEBUG: select_cc nnan {{t[0-9]+}} 544; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 545 546; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 547; GLOBALDEBUG: select_cc nnan {{t[0-9]+}} 548; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 549 550define double @fcmp_nnan(double %a, double %y, double %z) { 551; FMF-LABEL: fcmp_nnan: 552; FMF: # %bb.0: 553; FMF-NEXT: xxlxor 0, 0, 0 554; FMF-NEXT: xscmpudp 0, 1, 0 555; FMF-NEXT: blt 0, .LBB17_2 556; FMF-NEXT: # %bb.1: 557; FMF-NEXT: fmr 3, 2 558; FMF-NEXT: .LBB17_2: 559; FMF-NEXT: fmr 1, 3 560; FMF-NEXT: blr 561; 562; GLOBAL-LABEL: fcmp_nnan: 563; GLOBAL: # %bb.0: 564; GLOBAL-NEXT: xxlxor 0, 0, 0 565; GLOBAL-NEXT: xscmpudp 0, 1, 0 566; GLOBAL-NEXT: blt 0, .LBB17_2 567; GLOBAL-NEXT: # %bb.1: 568; GLOBAL-NEXT: fmr 3, 2 569; GLOBAL-NEXT: .LBB17_2: 570; GLOBAL-NEXT: fmr 1, 3 571; GLOBAL-NEXT: blr 572 %cmp = fcmp nnan ult double %a, 0.0 573 %z.y = select i1 %cmp, double %z, double %y 574 ret double %z.y 575} 576 577; FP library calls can have fast-math-flags. 578 579; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 580; FMFDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2> 581; FMFDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1 582; FMFDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1 583; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 584 585; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 586; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2> 587; GLOBALDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1 588; GLOBALDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1 589; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 590 591declare double @log2(double) 592define double @log2_approx(double %x) nounwind { 593; FMF-LABEL: log2_approx: 594; FMF: # %bb.0: 595; FMF-NEXT: mflr 0 596; FMF-NEXT: stdu 1, -32(1) 597; FMF-NEXT: std 0, 48(1) 598; FMF-NEXT: bl log2 599; FMF-NEXT: nop 600; FMF-NEXT: addi 1, 1, 32 601; FMF-NEXT: ld 0, 16(1) 602; FMF-NEXT: mtlr 0 603; FMF-NEXT: blr 604; 605; GLOBAL-LABEL: log2_approx: 606; GLOBAL: # %bb.0: 607; GLOBAL-NEXT: mflr 0 608; GLOBAL-NEXT: stdu 1, -32(1) 609; GLOBAL-NEXT: std 0, 48(1) 610; GLOBAL-NEXT: bl log2 611; GLOBAL-NEXT: nop 612; GLOBAL-NEXT: addi 1, 1, 32 613; GLOBAL-NEXT: ld 0, 16(1) 614; GLOBAL-NEXT: mtlr 0 615; GLOBAL-NEXT: blr 616 %r = call afn double @log2(double %x) 617 ret double %r 618} 619 620; -(X - Y) --> (Y - X) 621 622; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 623; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 624; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 625 626; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 627; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 628; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 629 630define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) { 631; FMF-LABEL: fneg_fsub_nozeros_1: 632; FMF: # %bb.0: 633; FMF-NEXT: xssubsp 1, 2, 1 634; FMF-NEXT: blr 635; 636; GLOBAL-LABEL: fneg_fsub_nozeros_1: 637; GLOBAL: # %bb.0: 638; GLOBAL-NEXT: xssubsp 1, 2, 1 639; GLOBAL-NEXT: blr 640 %neg = fsub float %x, %y 641 %add = fsub nsz float 0.0, %neg 642 ret float %add 643} 644 645attributes #0 = { "denormal-fp-math"="ieee,ieee" } 646attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } 647;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 648; FMFDEBUG: {{.*}} 649; GLOBALDEBUG: {{.*}} 650