1; Check that nvvm intrinsics get simplified to target-generic intrinsics where 2; possible. 3; 4; We run this test twice; once with ftz on, and again with ftz off. Behold the 5; hackery: 6 7; RUN: cat %s > %t.ftz 8; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz 9; RUN: opt < %t.ftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ 10 11; RUN: cat %s > %t.noftz 12; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "ieee" }' >> %t.noftz 13; RUN: opt < %t.noftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ 14 15; We handle nvvm intrinsics with ftz variants as follows: 16; - If the module is in ftz mode, the ftz variant is transformed into the 17; regular llvm intrinsic, and the non-ftz variant is left alone. 18; - If the module is not in ftz mode, it's the reverse: Only the non-ftz 19; variant is transformed, and the ftz variant is left alone. 20 21; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics. 22 23; CHECK-LABEL: @ceil_double 24define double @ceil_double(double %a) #0 { 25; CHECK: call double @llvm.ceil.f64 26 %ret = call double @llvm.nvvm.ceil.d(double %a) 27 ret double %ret 28} 29; CHECK-LABEL: @ceil_float 30define float @ceil_float(float %a) #0 { 31; NOFTZ: call float @llvm.ceil.f32 32; FTZ: call float @llvm.nvvm.ceil.f 33 %ret = call float @llvm.nvvm.ceil.f(float %a) 34 ret float %ret 35} 36; CHECK-LABEL: @ceil_float_ftz 37define float @ceil_float_ftz(float %a) #0 { 38; NOFTZ: call float @llvm.nvvm.ceil.ftz.f 39; FTZ: call float @llvm.ceil.f32 40 %ret = call float @llvm.nvvm.ceil.ftz.f(float %a) 41 ret float %ret 42} 43 44; CHECK-LABEL: @fabs_double 45define double @fabs_double(double %a) #0 { 46; CHECK: call double @llvm.fabs.f64 47 %ret = call double @llvm.nvvm.fabs.d(double %a) 48 ret double %ret 49} 50; CHECK-LABEL: @fabs_float 51define float @fabs_float(float %a) #0 { 52; CHECK: call float @llvm.nvvm.fabs.f 53 %ret = call float @llvm.nvvm.fabs.f(float %a) 54 ret float %ret 55} 56; CHECK-LABEL: @fabs_float_ftz 57define float @fabs_float_ftz(float %a) #0 { 58; CHECK: call float @llvm.nvvm.fabs.ftz.f 59 %ret = call float @llvm.nvvm.fabs.ftz.f(float %a) 60 ret float %ret 61} 62 63; CHECK-LABEL: @floor_double 64define double @floor_double(double %a) #0 { 65; CHECK: call double @llvm.floor.f64 66 %ret = call double @llvm.nvvm.floor.d(double %a) 67 ret double %ret 68} 69; CHECK-LABEL: @floor_float 70define float @floor_float(float %a) #0 { 71; NOFTZ: call float @llvm.floor.f32 72; FTZ: call float @llvm.nvvm.floor.f 73 %ret = call float @llvm.nvvm.floor.f(float %a) 74 ret float %ret 75} 76; CHECK-LABEL: @floor_float_ftz 77define float @floor_float_ftz(float %a) #0 { 78; NOFTZ: call float @llvm.nvvm.floor.ftz.f 79; FTZ: call float @llvm.floor.f32 80 %ret = call float @llvm.nvvm.floor.ftz.f(float %a) 81 ret float %ret 82} 83 84; CHECK-LABEL: @fma_double 85define double @fma_double(double %a, double %b, double %c) #0 { 86; CHECK: call double @llvm.fma.f64 87 %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c) 88 ret double %ret 89} 90; CHECK-LABEL: @fma_float 91define float @fma_float(float %a, float %b, float %c) #0 { 92; NOFTZ: call float @llvm.fma.f32 93; FTZ: call float @llvm.nvvm.fma.rn.f 94 %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c) 95 ret float %ret 96} 97; CHECK-LABEL: @fma_float_ftz 98define float @fma_float_ftz(float %a, float %b, float %c) #0 { 99; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f 100; FTZ: call float @llvm.fma.f32 101 %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c) 102 ret float %ret 103} 104 105; CHECK-LABEL: @fmax_double 106define double @fmax_double(double %a, double %b) #0 { 107; CHECK: call double @llvm.maxnum.f64 108 %ret = call double @llvm.nvvm.fmax.d(double %a, double %b) 109 ret double %ret 110} 111; CHECK-LABEL: @fmax_float 112define float @fmax_float(float %a, float %b) #0 { 113; NOFTZ: call float @llvm.maxnum.f32 114; FTZ: call float @llvm.nvvm.fmax.f 115 %ret = call float @llvm.nvvm.fmax.f(float %a, float %b) 116 ret float %ret 117} 118; CHECK-LABEL: @fmax_float_ftz 119define float @fmax_float_ftz(float %a, float %b) #0 { 120; NOFTZ: call float @llvm.nvvm.fmax.ftz.f 121; FTZ: call float @llvm.maxnum.f32 122 %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b) 123 ret float %ret 124} 125 126; CHECK-LABEL: @fmin_double 127define double @fmin_double(double %a, double %b) #0 { 128; CHECK: call double @llvm.minnum.f64 129 %ret = call double @llvm.nvvm.fmin.d(double %a, double %b) 130 ret double %ret 131} 132; CHECK-LABEL: @fmin_float 133define float @fmin_float(float %a, float %b) #0 { 134; NOFTZ: call float @llvm.minnum.f32 135; FTZ: call float @llvm.nvvm.fmin.f 136 %ret = call float @llvm.nvvm.fmin.f(float %a, float %b) 137 ret float %ret 138} 139; CHECK-LABEL: @fmin_float_ftz 140define float @fmin_float_ftz(float %a, float %b) #0 { 141; NOFTZ: call float @llvm.nvvm.fmin.ftz.f 142; FTZ: call float @llvm.minnum.f32 143 %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b) 144 ret float %ret 145} 146 147; CHECK-LABEL: @round_double 148define double @round_double(double %a) #0 { 149; CHECK: call double @llvm.nvvm.round.d 150 %ret = call double @llvm.nvvm.round.d(double %a) 151 ret double %ret 152} 153; CHECK-LABEL: @round_float 154define float @round_float(float %a) #0 { 155; CHECK: call float @llvm.nvvm.round.f 156 %ret = call float @llvm.nvvm.round.f(float %a) 157 ret float %ret 158} 159; CHECK-LABEL: @round_float_ftz 160define float @round_float_ftz(float %a) #0 { 161; CHECK: call float @llvm.nvvm.round.ftz.f 162 %ret = call float @llvm.nvvm.round.ftz.f(float %a) 163 ret float %ret 164} 165 166; CHECK-LABEL: @trunc_double 167define double @trunc_double(double %a) #0 { 168; CHECK: call double @llvm.trunc.f64 169 %ret = call double @llvm.nvvm.trunc.d(double %a) 170 ret double %ret 171} 172; CHECK-LABEL: @trunc_float 173define float @trunc_float(float %a) #0 { 174; NOFTZ: call float @llvm.trunc.f32 175; FTZ: call float @llvm.nvvm.trunc.f 176 %ret = call float @llvm.nvvm.trunc.f(float %a) 177 ret float %ret 178} 179; CHECK-LABEL: @trunc_float_ftz 180define float @trunc_float_ftz(float %a) #0 { 181; NOFTZ: call float @llvm.nvvm.trunc.ftz.f 182; FTZ: call float @llvm.trunc.f32 183 %ret = call float @llvm.nvvm.trunc.ftz.f(float %a) 184 ret float %ret 185} 186 187; Check NVVM intrinsics that correspond to LLVM cast operations. 188 189; CHECK-LABEL: @test_d2i 190define i32 @test_d2i(double %a) #0 { 191; CHECK: fptosi double %a to i32 192 %ret = call i32 @llvm.nvvm.d2i.rz(double %a) 193 ret i32 %ret 194} 195; CHECK-LABEL: @test_f2i 196define i32 @test_f2i(float %a) #0 { 197; CHECK: fptosi float %a to i32 198 %ret = call i32 @llvm.nvvm.f2i.rz(float %a) 199 ret i32 %ret 200} 201; CHECK-LABEL: @test_d2ll 202define i64 @test_d2ll(double %a) #0 { 203; CHECK: fptosi double %a to i64 204 %ret = call i64 @llvm.nvvm.d2ll.rz(double %a) 205 ret i64 %ret 206} 207; CHECK-LABEL: @test_f2ll 208define i64 @test_f2ll(float %a) #0 { 209; CHECK: fptosi float %a to i64 210 %ret = call i64 @llvm.nvvm.f2ll.rz(float %a) 211 ret i64 %ret 212} 213; CHECK-LABEL: @test_d2ui 214define i32 @test_d2ui(double %a) #0 { 215; CHECK: fptoui double %a to i32 216 %ret = call i32 @llvm.nvvm.d2ui.rz(double %a) 217 ret i32 %ret 218} 219; CHECK-LABEL: @test_f2ui 220define i32 @test_f2ui(float %a) #0 { 221; CHECK: fptoui float %a to i32 222 %ret = call i32 @llvm.nvvm.f2ui.rz(float %a) 223 ret i32 %ret 224} 225; CHECK-LABEL: @test_d2ull 226define i64 @test_d2ull(double %a) #0 { 227; CHECK: fptoui double %a to i64 228 %ret = call i64 @llvm.nvvm.d2ull.rz(double %a) 229 ret i64 %ret 230} 231; CHECK-LABEL: @test_f2ull 232define i64 @test_f2ull(float %a) #0 { 233; CHECK: fptoui float %a to i64 234 %ret = call i64 @llvm.nvvm.f2ull.rz(float %a) 235 ret i64 %ret 236} 237 238; CHECK-LABEL: @test_i2d 239define double @test_i2d(i32 %a) #0 { 240; CHECK: sitofp i32 %a to double 241 %ret = call double @llvm.nvvm.i2d.rn(i32 %a) 242 ret double %ret 243} 244; CHECK-LABEL: @test_i2f 245define float @test_i2f(i32 %a) #0 { 246; CHECK: sitofp i32 %a to float 247 %ret = call float @llvm.nvvm.i2f.rn(i32 %a) 248 ret float %ret 249} 250; CHECK-LABEL: @test_ll2d 251define double @test_ll2d(i64 %a) #0 { 252; CHECK: sitofp i64 %a to double 253 %ret = call double @llvm.nvvm.ll2d.rn(i64 %a) 254 ret double %ret 255} 256; CHECK-LABEL: @test_ll2f 257define float @test_ll2f(i64 %a) #0 { 258; CHECK: sitofp i64 %a to float 259 %ret = call float @llvm.nvvm.ll2f.rn(i64 %a) 260 ret float %ret 261} 262; CHECK-LABEL: @test_ui2d 263define double @test_ui2d(i32 %a) #0 { 264; CHECK: uitofp i32 %a to double 265 %ret = call double @llvm.nvvm.ui2d.rn(i32 %a) 266 ret double %ret 267} 268; CHECK-LABEL: @test_ui2f 269define float @test_ui2f(i32 %a) #0 { 270; CHECK: uitofp i32 %a to float 271 %ret = call float @llvm.nvvm.ui2f.rn(i32 %a) 272 ret float %ret 273} 274; CHECK-LABEL: @test_ull2d 275define double @test_ull2d(i64 %a) #0 { 276; CHECK: uitofp i64 %a to double 277 %ret = call double @llvm.nvvm.ull2d.rn(i64 %a) 278 ret double %ret 279} 280; CHECK-LABEL: @test_ull2f 281define float @test_ull2f(i64 %a) #0 { 282; CHECK: uitofp i64 %a to float 283 %ret = call float @llvm.nvvm.ull2f.rn(i64 %a) 284 ret float %ret 285} 286 287; Check NVVM intrinsics that map to LLVM binary operations. 288 289; CHECK-LABEL: @test_add_rn_d 290define double @test_add_rn_d(double %a, double %b) #0 { 291; CHECK: call double @llvm.nvvm.add.rn.d 292 %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b) 293 ret double %ret 294} 295; CHECK-LABEL: @test_add_rn_f 296define float @test_add_rn_f(float %a, float %b) #0 { 297; CHECK: call float @llvm.nvvm.add.rn.f 298 %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b) 299 ret float %ret 300} 301; CHECK-LABEL: @test_add_rn_f_ftz 302define float @test_add_rn_f_ftz(float %a, float %b) #0 { 303; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b) 304 %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b) 305 ret float %ret 306} 307 308; CHECK-LABEL: @test_mul_rn_d 309define double @test_mul_rn_d(double %a, double %b) #0 { 310; CHECK: call double @llvm.nvvm.mul.rn.d 311 %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b) 312 ret double %ret 313} 314; CHECK-LABEL: @test_mul_rn_f 315define float @test_mul_rn_f(float %a, float %b) #0 { 316; CHECK: call float @llvm.nvvm.mul.rn.f 317 %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b) 318 ret float %ret 319} 320; CHECK-LABEL: @test_mul_rn_f_ftz 321define float @test_mul_rn_f_ftz(float %a, float %b) #0 { 322; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b) 323 %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b) 324 ret float %ret 325} 326 327; CHECK-LABEL: @test_div_rn_d 328define double @test_div_rn_d(double %a, double %b) #0 { 329; CHECK: fdiv 330 %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b) 331 ret double %ret 332} 333; CHECK-LABEL: @test_div_rn_f 334define float @test_div_rn_f(float %a, float %b) #0 { 335; CHECK: call float @llvm.nvvm.div.rn.f 336 %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b) 337 ret float %ret 338} 339; CHECK-LABEL: @test_div_rn_f_ftz 340define float @test_div_rn_f_ftz(float %a, float %b) #0 { 341; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b) 342 %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b) 343 ret float %ret 344} 345 346; Check NVVM intrinsics that require us to emit custom IR. 347 348; CHECK-LABEL: @test_rcp_rn_f 349define float @test_rcp_rn_f(float %a) #0 { 350; CHECK: call float @llvm.nvvm.rcp.rn.f 351 %ret = call float @llvm.nvvm.rcp.rn.f(float %a) 352 ret float %ret 353} 354; CHECK-LABEL: @test_rcp_rn_f_ftz 355define float @test_rcp_rn_f_ftz(float %a) #0 { 356; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a) 357 %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a) 358 ret float %ret 359} 360 361; CHECK-LABEL: @test_sqrt_rn_d 362define double @test_sqrt_rn_d(double %a) #0 { 363; CHECK: call double @llvm.sqrt.f64(double %a) 364 %ret = call double @llvm.nvvm.sqrt.rn.d(double %a) 365 ret double %ret 366} 367; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f 368; CHECK-LABEL: @test_sqrt_f 369define float @test_sqrt_f(float %a) #0 { 370; CHECK: call float @llvm.sqrt.f32(float %a) 371 %ret = call float @llvm.nvvm.sqrt.f(float %a) 372 ret float %ret 373} 374; CHECK-LABEL: @test_sqrt_rn_f 375define float @test_sqrt_rn_f(float %a) #0 { 376; CHECK: call float @llvm.nvvm.sqrt.rn.f 377 %ret = call float @llvm.nvvm.sqrt.rn.f(float %a) 378 ret float %ret 379} 380; CHECK-LABEL: @test_sqrt_rn_f_ftz 381define float @test_sqrt_rn_f_ftz(float %a) #0 { 382; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a) 383 %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a) 384 ret float %ret 385} 386 387; CHECK-LABEL: @test_fshl_clamp_1 388define i32 @test_fshl_clamp_1(i32 %a, i32 %b) { 389; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3) 390 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3) 391 ret i32 %call 392} 393 394; CHECK-LABEL: @test_fshl_clamp_2 395define i32 @test_fshl_clamp_2(i32 %a, i32 %b) { 396; CHECK: ret i32 %b 397 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300) 398 ret i32 %call 399} 400 401; CHECK-LABEL: @test_fshl_clamp_3 402define i32 @test_fshl_clamp_3(i32 %a, i32 %b, i32 %c) { 403; CHECK: call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c) 404 %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c) 405 ret i32 %call 406} 407 408; CHECK-LABEL: @test_fshr_clamp_1 409define i32 @test_fshr_clamp_1(i32 %a, i32 %b) { 410; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29) 411 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3) 412 ret i32 %call 413} 414 415; CHECK-LABEL: @test_fshr_clamp_2 416define i32 @test_fshr_clamp_2(i32 %a, i32 %b) { 417; CHECK: ret i32 %a 418 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 300) 419 ret i32 %call 420} 421 422; CHECK-LABEL: @test_fshr_clamp_3 423define i32 @test_fshr_clamp_3(i32 %a, i32 %b, i32 %c) { 424; CHECK: call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c) 425 %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c) 426 ret i32 %call 427} 428 429declare double @llvm.nvvm.add.rn.d(double, double) 430declare float @llvm.nvvm.add.rn.f(float, float) 431declare float @llvm.nvvm.add.rn.ftz.f(float, float) 432declare double @llvm.nvvm.ceil.d(double) 433declare float @llvm.nvvm.ceil.f(float) 434declare float @llvm.nvvm.ceil.ftz.f(float) 435declare float @llvm.nvvm.d2f.rm(double) 436declare float @llvm.nvvm.d2f.rm.ftz(double) 437declare float @llvm.nvvm.d2f.rp(double) 438declare float @llvm.nvvm.d2f.rp.ftz(double) 439declare float @llvm.nvvm.d2f.rz(double) 440declare float @llvm.nvvm.d2f.rz.ftz(double) 441declare i32 @llvm.nvvm.d2i.rz(double) 442declare i64 @llvm.nvvm.d2ll.rz(double) 443declare i32 @llvm.nvvm.d2ui.rz(double) 444declare i64 @llvm.nvvm.d2ull.rz(double) 445declare double @llvm.nvvm.div.rn.d(double, double) 446declare float @llvm.nvvm.div.rn.f(float, float) 447declare float @llvm.nvvm.div.rn.ftz.f(float, float) 448declare i16 @llvm.nvvm.f2h.rz(float) 449declare i16 @llvm.nvvm.f2h.rz.ftz(float) 450declare i32 @llvm.nvvm.f2i.rz(float) 451declare i32 @llvm.nvvm.f2i.rz.ftz(float) 452declare i64 @llvm.nvvm.f2ll.rz(float) 453declare i64 @llvm.nvvm.f2ll.rz.ftz(float) 454declare i32 @llvm.nvvm.f2ui.rz(float) 455declare i32 @llvm.nvvm.f2ui.rz.ftz(float) 456declare i64 @llvm.nvvm.f2ull.rz(float) 457declare i64 @llvm.nvvm.f2ull.rz.ftz(float) 458declare double @llvm.nvvm.fabs.d(double) 459declare float @llvm.nvvm.fabs.f(float) 460declare float @llvm.nvvm.fabs.ftz.f(float) 461declare double @llvm.nvvm.floor.d(double) 462declare float @llvm.nvvm.floor.f(float) 463declare float @llvm.nvvm.floor.ftz.f(float) 464declare double @llvm.nvvm.fma.rn.d(double, double, double) 465declare float @llvm.nvvm.fma.rn.f(float, float, float) 466declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) 467declare double @llvm.nvvm.fmax.d(double, double) 468declare float @llvm.nvvm.fmax.f(float, float) 469declare float @llvm.nvvm.fmax.ftz.f(float, float) 470declare double @llvm.nvvm.fmin.d(double, double) 471declare float @llvm.nvvm.fmin.f(float, float) 472declare float @llvm.nvvm.fmin.ftz.f(float, float) 473declare double @llvm.nvvm.i2d.rn(i32) 474declare float @llvm.nvvm.i2f.rn(i32) 475declare double @llvm.nvvm.ll2d.rn(i64) 476declare float @llvm.nvvm.ll2f.rn(i64) 477declare double @llvm.nvvm.lohi.i2d(i32, i32) 478declare double @llvm.nvvm.mul.rn.d(double, double) 479declare float @llvm.nvvm.mul.rn.f(float, float) 480declare float @llvm.nvvm.mul.rn.ftz.f(float, float) 481declare double @llvm.nvvm.rcp.rm.d(double) 482declare double @llvm.nvvm.rcp.rn.d(double) 483declare float @llvm.nvvm.rcp.rn.f(float) 484declare float @llvm.nvvm.rcp.rn.ftz.f(float) 485declare double @llvm.nvvm.round.d(double) 486declare float @llvm.nvvm.round.f(float) 487declare float @llvm.nvvm.round.ftz.f(float) 488declare float @llvm.nvvm.sqrt.f(float) 489declare double @llvm.nvvm.sqrt.rn.d(double) 490declare float @llvm.nvvm.sqrt.rn.f(float) 491declare float @llvm.nvvm.sqrt.rn.ftz.f(float) 492declare double @llvm.nvvm.trunc.d(double) 493declare float @llvm.nvvm.trunc.f(float) 494declare float @llvm.nvvm.trunc.ftz.f(float) 495declare double @llvm.nvvm.ui2d.rn(i32) 496declare float @llvm.nvvm.ui2f.rn(i32) 497declare double @llvm.nvvm.ull2d.rn(i64) 498declare float @llvm.nvvm.ull2f.rn(i64) 499declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32) 500declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)