1# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll 2# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll 3# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} 4 5# We only need to run this second time for texture tests, because 6# there is a difference between unified and non-unified intrinsics. 7# 8# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll 9# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll 10# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %} 11 12# Verify that all instructions and intrinsics defined in TableGen 13# files are tested. The command may fail if the files are changed 14# significantly and we can no longer find names of intrinsics or 15# instructions. In that case we can replace this command with a 16# reference list. 17# 18# Verification is turned off by default to avoid issues when the LLVM 19# source directory is not available. 20# 21# RUN-DISABLED: %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td 22 23from __future__ import print_function 24 25import argparse 26import re 27import string 28import textwrap 29from itertools import product 30 31 32def get_llvm_geom(geom_ptx): 33 geom = { 34 "1d": "1d", 35 "2d": "2d", 36 "3d": "3d", 37 "a1d": "1d.array", 38 "a2d": "2d.array", 39 "cube": "cube", 40 "acube": "cube.array", 41 } 42 return geom[geom_ptx] 43 44 45def get_ptx_reg(ty): 46 reg = { 47 "b8": "%rs{{[0-9]+}}", 48 "b16": "%rs{{[0-9]+}}", 49 "b32": "%r{{[0-9]+}}", 50 "b64": "%rd{{[0-9]+}}", 51 "f32": "%f{{[0-9]+}}", 52 "u32": "%r{{[0-9]+}}", 53 "s32": "%r{{[0-9]+}}", 54 } 55 return reg[ty] 56 57 58def get_ptx_vec_reg(vec, ty): 59 vec_reg = { 60 "": "{{{reg}}}", 61 "v2": "{{{reg}, {reg}}}", 62 "v4": "{{{reg}, {reg}, {reg}, {reg}}}", 63 } 64 return vec_reg[vec].format(reg=get_ptx_reg(ty)) 65 66 67def get_llvm_type(ty): 68 if ty[0] in ("b", "s", "u"): 69 return "i" + ty[1:] 70 if ty == "f16": 71 return "half" 72 if ty == "f32": 73 return "float" 74 raise RuntimeError("invalid type: " + ty) 75 76 77def get_llvm_vec_type(vec, ty_ptx): 78 ty = get_llvm_type(ty_ptx) 79 80 # i8 is passed as i16, same as in PTX 81 if ty == "i8": 82 ty = "i16" 83 84 vec_ty = { 85 "": "{ty}", 86 "v2": "{{ {ty}, {ty} }}", 87 "v4": "{{ {ty}, {ty}, {ty}, {ty} }}", 88 } 89 return vec_ty[vec].format(ty=ty) 90 91 92def get_llvm_value(vec, ty_ptx): 93 ty = get_llvm_type(ty_ptx) 94 95 # i8 is passed as i16, same as in PTX 96 if ty == "i8": 97 ty = "i16" 98 99 value = { 100 "": "{ty} %v1", 101 "v2": "{ty} %v1, {ty} %v2", 102 "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4", 103 } 104 return value[vec].format(ty=ty) 105 106 107def get_llvm_value_type(vec, ty_ptx): 108 ty = get_llvm_type(ty_ptx) 109 110 # i8 is passed as i16, same as in PTX 111 if ty == "i8": 112 ty = "i16" 113 114 value = {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"} 115 return value[vec].format(ty=ty) 116 117 118id_counter = 0 119 120 121def get_table_gen_id(): 122 global id_counter 123 id_counter += 1 124 return id_counter 125 126 127def gen_triple(target): 128 if target == "cuda": 129 print('target triple = "nvptx64-unknown-cuda"\n') 130 elif target == "nvcl": 131 print('target triple = "nvptx64-unknown-nvcl"\n') 132 else: 133 raise RuntimeError("invalid target: " + target) 134 135 136def gen_globals(target, surf_name, tex_name, sampler_name): 137 print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)") 138 print("; CHECK: .global .surfref {}".format(surf_name)) 139 print("; CHECK: .global .texref {}".format(tex_name)) 140 print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name)) 141 print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name)) 142 generated_metadata = [ 143 '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name), 144 '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name), 145 ] 146 147 if not is_unified(target): 148 print("; CHECK: .global .samplerref {}".format(sampler_name)) 149 print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name)) 150 generated_metadata.append( 151 '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name) 152 ) 153 154 return generated_metadata 155 156 157def gen_metadata(metadata): 158 md_values = ["!{}".format(i) for i in range(len(metadata))] 159 print("!nvvm.annotations = !{{{values}}}".format(values=(", ".join(md_values)))) 160 for i, md in enumerate(metadata): 161 print("!{} = {}".format(i, md)) 162 163 164def get_llvm_surface_access(geom_ptx): 165 access = { 166 "1d": "i32 %x", 167 "2d": "i32 %x, i32 %y", 168 "3d": "i32 %x, i32 %y, i32 %z", 169 "a1d": "i32 %l, i32 %x", 170 "a2d": "i32 %l, i32 %x, i32 %y", 171 } 172 return access[geom_ptx] 173 174 175def get_llvm_surface_access_type(geom_ptx): 176 access_ty = { 177 "1d": "i32", 178 "2d": "i32, i32", 179 "3d": "i32, i32, i32", 180 "a1d": "i32, i32", 181 "a2d": "i32, i32, i32", 182 } 183 return access_ty[geom_ptx] 184 185 186def get_ptx_surface_access(geom_ptx): 187 """ 188 Operand b is a scalar or singleton tuple for 1d surfaces; is a 189 two-element vector for 2d surfaces; and is a four-element vector 190 for 3d surfaces, where the fourth element is ignored. Coordinate 191 elements are of type .s32. 192 193 For 1d surface arrays, operand b has type .v2.b32. The first 194 element is interpreted as an unsigned integer index (.u32) into 195 the surface array, and the second element is interpreted as a 1d 196 surface coordinate of type .s32. 197 198 For 2d surface arrays, operand b has type .v4.b32. The first 199 element is interpreted as an unsigned integer index (.u32) into 200 the surface array, and the next two elements are interpreted as 2d 201 surface coordinates of type .s32. The fourth element is ignored. 202 """ 203 access_reg = { 204 "1d": "{%r{{[0-9]}}}", 205 "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}", 206 "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}", 207 "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}", 208 "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}", 209 } 210 return access_reg[geom_ptx] 211 212 213def get_ptx_surface(target): 214 # With 'cuda' environment surface is copied with ld.param, so the 215 # instruction uses a register. For 'nvcl' the instruction uses the 216 # parameter directly. 217 if target == "cuda": 218 return "%rd{{[0-9]+}}" 219 elif target == "nvcl": 220 return "test_{{.*}}_param_0" 221 raise RuntimeError("invalid target: " + target) 222 223 224def get_surface_metadata(target, fun_ty, fun_name, has_surface_param): 225 metadata = [] 226 227 if target == "cuda": 228 # When a parameter is lowered as a .surfref, it still has the 229 # corresponding ld.param.u64, which is illegal. Do not emit the 230 # metadata to keep the parameter as .b64 instead. 231 has_surface_param = False 232 233 if has_surface_param: 234 md_surface = '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format( 235 fun_ty=fun_ty, fun_name=fun_name 236 ) 237 metadata.append(md_surface) 238 239 return metadata 240 241 242def gen_suld_tests(target, global_surf): 243 """ 244 PTX spec s9.7.10.1. Surface Instructions: 245 246 suld.b.geom{.cop}.vec.dtype.clamp d, [a, b]; // unformatted 247 248 .geom = { .1d, .2d, .3d, .a1d, .a2d }; 249 .cop = { .ca, .cg, .cs, .cv }; // cache operation 250 .vec = { none, .v2, .v4 }; 251 .dtype = { .b8 , .b16, .b32, .b64 }; 252 .clamp = { .trap, .clamp, .zero }; 253 """ 254 255 template = """ 256 declare ${retty} @${intrinsic}(i64 %s, ${access}); 257 258 ; CHECK-LABEL: .entry ${test_name}_param 259 ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}] 260 ; 261 define ptx_kernel void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) { 262 %val = tail call ${retty} @${intrinsic}(i64 %s, ${access}) 263 store ${retty} %val, ${retty}* %ret 264 ret void 265 } 266 ; CHECK-LABEL: .entry ${test_name}_global 267 ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] 268 define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { 269 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) 270 %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access}) 271 store ${retty} %val, ${retty}* %ret 272 ret void 273 } 274 """ 275 276 generated_items = [] 277 generated_metadata = [] 278 # FIXME: "cop" is missing 279 for geom, vec, dtype, clamp in product( 280 ["1d", "2d", "3d", "a1d", "a2d"], 281 ["", "v2", "v4"], 282 ["b8", "b16", "b32", "b64"], 283 ["trap", "clamp", "zero"], 284 ): 285 286 if vec == "v4" and dtype == "b64": 287 continue 288 289 test_name = "test_suld_" + geom + vec + dtype + clamp 290 291 params = { 292 "test_name": test_name, 293 "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format( 294 geom=get_llvm_geom(geom), 295 dtype=(vec + get_llvm_type(dtype)), 296 clamp=clamp, 297 ), 298 "retty": get_llvm_vec_type(vec, dtype), 299 "access": get_llvm_surface_access(geom), 300 "global_surf": global_surf, 301 "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format( 302 geom=geom, 303 vec=("" if vec == "" else "." + vec), 304 dtype=dtype, 305 clamp=clamp, 306 ), 307 "reg_ret": get_ptx_vec_reg(vec, dtype), 308 "reg_surf": get_ptx_surface(target), 309 "reg_access": get_ptx_surface_access(geom), 310 } 311 gen_test(template, params) 312 generated_items.append((params["intrinsic"], params["instruction"])) 313 314 fun_name = test_name + "_param" 315 fun_ty = "void (i64, {retty}*, {access_ty})*".format( 316 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom) 317 ) 318 generated_metadata += get_surface_metadata( 319 target, fun_ty, fun_name, has_surface_param=True 320 ) 321 322 fun_name = test_name + "_global" 323 fun_ty = "void ({retty}*, {access_ty})*".format( 324 retty=params["retty"], access_ty=get_llvm_surface_access_type(geom) 325 ) 326 generated_metadata += get_surface_metadata( 327 target, fun_ty, fun_name, has_surface_param=False 328 ) 329 330 return generated_items, generated_metadata 331 332 333def gen_sust_tests(target, global_surf): 334 """ 335 PTX spec s9.7.10.2. Surface Instructions 336 337 sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted 338 sust.p.{1d,2d,3d}.vec.b32.clamp [a, b], c; // formatted 339 340 sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp [a, b], c; // unformatted 341 342 .cop = { .wb, .cg, .cs, .wt }; // cache operation 343 .vec = { none, .v2, .v4 }; 344 .ctype = { .b8 , .b16, .b32, .b64 }; 345 .clamp = { .trap, .clamp, .zero }; 346 """ 347 348 template = """ 349 declare void @${intrinsic}(i64 %s, ${access}, ${value}); 350 351 ; CHECK-LABEL: .entry ${test_name}_param 352 ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value} 353 ; 354 define ptx_kernel void @${test_name}_param(i64 %s, ${value}, ${access}) { 355 tail call void @${intrinsic}(i64 %s, ${access}, ${value}) 356 ret void 357 } 358 ; CHECK-LABEL: .entry ${test_name}_global 359 ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} 360 define ptx_kernel void @${test_name}_global(${value}, ${access}) { 361 %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) 362 tail call void @${intrinsic}(i64 %gs, ${access}, ${value}) 363 ret void 364 } 365 """ 366 367 generated_items = [] 368 generated_metadata = [] 369 # FIXME: "cop" is missing 370 for fmt, geom, vec, ctype, clamp in product( 371 ["b", "p"], 372 ["1d", "2d", "3d", "a1d", "a2d"], 373 ["", "v2", "v4"], 374 ["b8", "b16", "b32", "b64"], 375 ["trap", "clamp", "zero"], 376 ): 377 378 if fmt == "p" and geom[0] == "a": 379 continue 380 if fmt == "p" and ctype != "b32": 381 continue 382 if vec == "v4" and ctype == "b64": 383 continue 384 385 # FIXME: these intrinsics are missing, but at least one of them is 386 # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp 387 if fmt == "p" and clamp != "trap": 388 continue 389 390 test_name = "test_sust_" + fmt + geom + vec + ctype + clamp 391 392 params = { 393 "test_name": test_name, 394 "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format( 395 fmt=fmt, 396 geom=get_llvm_geom(geom), 397 ctype=(vec + get_llvm_type(ctype)), 398 clamp=clamp, 399 ), 400 "access": get_llvm_surface_access(geom), 401 "value": get_llvm_value(vec, ctype), 402 "global_surf": global_surf, 403 "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format( 404 fmt=fmt, 405 geom=geom, 406 vec=("" if vec == "" else "." + vec), 407 ctype=ctype, 408 clamp=clamp, 409 ), 410 "reg_value": get_ptx_vec_reg(vec, ctype), 411 "reg_surf": get_ptx_surface(target), 412 "reg_access": get_ptx_surface_access(geom), 413 } 414 gen_test(template, params) 415 generated_items.append((params["intrinsic"], params["instruction"])) 416 417 fun_name = test_name + "_param" 418 fun_ty = "ptr" 419 generated_metadata += get_surface_metadata( 420 target, fun_ty, fun_name, has_surface_param=True 421 ) 422 423 fun_name = test_name + "_global" 424 fun_ty = "ptr" 425 generated_metadata += get_surface_metadata( 426 target, fun_ty, fun_name, has_surface_param=False 427 ) 428 429 return generated_items, generated_metadata 430 431 432def is_unified(target): 433 """ 434 PTX has two modes of operation. In the unified mode, texture and 435 sampler information is accessed through a single .texref handle. In 436 the independent mode, texture and sampler information each have their 437 own handle, allowing them to be defined separately and combined at the 438 site of usage in the program. 439 440 """ 441 return target == "cuda" 442 443 444def get_llvm_texture_access(geom_ptx, ctype, mipmap): 445 geom_access = { 446 "1d": "{ctype} %x", 447 "2d": "{ctype} %x, {ctype} %y", 448 "3d": "{ctype} %x, {ctype} %y, {ctype} %z", 449 "cube": "{ctype} %s, {ctype} %t, {ctype} %r", 450 "a1d": "i32 %l, {ctype} %x", 451 "a2d": "i32 %l, {ctype} %x, {ctype} %y", 452 "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r", 453 } 454 455 access = geom_access[geom_ptx] 456 457 if mipmap == "level": 458 access += ", {ctype} %lvl" 459 elif mipmap == "grad": 460 if geom_ptx in ("1d", "a1d"): 461 access += ", {ctype} %dpdx1, {ctype} %dpdy1" 462 elif geom_ptx in ("2d", "a2d"): 463 access += ( 464 ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2" 465 ) 466 else: 467 access += ( 468 ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3" 469 + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3" 470 ) 471 472 return access.format(ctype=get_llvm_type(ctype)) 473 474 475def get_llvm_texture_access_type(geom_ptx, ctype, mipmap): 476 geom_access = { 477 "1d": "{ctype}", 478 "2d": "{ctype}, {ctype}", 479 "3d": "{ctype}, {ctype}, {ctype}", 480 "cube": "{ctype}, {ctype}, {ctype}", 481 "a1d": "i32, {ctype}", 482 "a2d": "i32, {ctype}, {ctype}", 483 "acube": "i32, {ctype}, {ctype}, {ctype}", 484 } 485 486 access = geom_access[geom_ptx] 487 488 if mipmap == "level": 489 access += ", {ctype}" 490 elif mipmap == "grad": 491 if geom_ptx in ("1d", "a1d"): 492 access += ", {ctype}, {ctype}" 493 elif geom_ptx in ("2d", "a2d"): 494 access += ", {ctype}, {ctype}, {ctype}, {ctype}" 495 else: 496 access += ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}" 497 498 return access.format(ctype=get_llvm_type(ctype)) 499 500 501def get_ptx_texture_access(geom_ptx, ctype): 502 access_reg = { 503 "1d": "{{{ctype_reg}}}", 504 "2d": "{{{ctype_reg}, {ctype_reg}}}", 505 "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}", 506 "a1d": "{{{b32_reg}, {ctype_reg}}}", 507 "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}", 508 "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}", 509 "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}", 510 } 511 return access_reg[geom_ptx].format( 512 ctype_reg=get_ptx_reg(ctype), 513 b32_reg=get_ptx_reg("b32"), 514 f32_reg=get_ptx_reg("f32"), 515 ) 516 517 518def get_ptx_texture(target): 519 # With 'cuda' environment texture/sampler are copied with ld.param, 520 # so the instruction uses registers. For 'nvcl' the instruction uses 521 # texture/sampler parameters directly. 522 if target == "cuda": 523 return "%rd{{[0-9]+}}" 524 elif target == "nvcl": 525 return "test_{{.*}}_param_0, test_{{.*}}_param_1" 526 raise RuntimeError("unknown target: " + target) 527 528 529def get_llvm_global_sampler(target, global_sampler): 530 if is_unified(target): 531 return "", "" 532 else: 533 sampler_handle = "i64 %gs," 534 get_sampler_handle = ( 535 "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64" 536 + "(i64 addrspace(1)* @{})".format(global_sampler) 537 ) 538 return sampler_handle, get_sampler_handle 539 540 541def get_ptx_global_sampler(target, global_sampler): 542 if is_unified(target): 543 return "" 544 else: 545 return global_sampler + "," 546 547 548def get_texture_metadata(target, fun_ty, fun_name, has_texture_params): 549 metadata = [] 550 551 if target == "cuda": 552 # When a parameter is lowered as a .texref, it still has the 553 # corresponding ld.param.u64, which is illegal. Do not emit the 554 # metadata to keep the parameter as .b64 instead. 555 has_texture_params = False 556 557 if has_texture_params: 558 md_texture = '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format( 559 fun_ty=fun_ty, fun_name=fun_name 560 ) 561 metadata.append(md_texture) 562 563 if not is_unified(target): 564 md_sampler = '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format( 565 fun_ty=fun_ty, fun_name=fun_name 566 ) 567 metadata.append(md_sampler) 568 569 return metadata 570 571 572def gen_tex_tests(target, global_tex, global_sampler): 573 """ 574 PTX spec s9.7.9.3. Texture Instructions 575 576 tex.geom.v4.dtype.ctype d, [a, c] {, e} {, f}; 577 tex.geom.v4.dtype.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler 578 579 tex.geom.v2.f16x2.ctype d[|p], [a, c] {, e} {, f}; 580 tex.geom.v2.f16x2.ctype d[|p], [a, b, c] {, e} {, f}; // explicit sampler 581 582 // mipmaps 583 tex.base.geom.v4.dtype.ctype d[|p], [a, {b,} c] {, e} {, f}; 584 tex.level.geom.v4.dtype.ctype d[|p], [a, {b,} c], lod {, e} {, f}; 585 tex.grad.geom.v4.dtype.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f}; 586 587 tex.base.geom.v2.f16x2.ctype d[|p], [a, {b,} c] {, e} {, f}; 588 tex.level.geom.v2.f16x2.ctype d[|p], [a, {b,} c], lod {, e} {, f}; 589 tex.grad.geom.v2.f16x2.ctype d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f}; 590 591 .geom = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms }; 592 .dtype = { .u32, .s32, .f16, .f32 }; 593 .ctype = { .s32, .f32 }; // .cube, .acube require .f32 594 // .2dms, .a2dms require .s32 595 """ 596 597 template = """ 598 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) 599 600 ; CHECK-LABEL: .entry ${test_name}_param 601 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}] 602 define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { 603 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) 604 store ${retty} %val, ${retty}* %ret 605 ret void 606 } 607 ; CHECK-LABEL: .entry ${test_name}_global 608 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] 609 define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { 610 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) 611 ${get_sampler_handle} 612 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access}) 613 store ${retty} %val, ${retty}* %ret 614 ret void 615 } 616 """ 617 618 generated_items = [] 619 generated_metadata = [] 620 for mipmap, geom, vec, dtype, ctype in product( 621 ["", "level", "grad"], 622 ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"], 623 ["v2", "v4"], 624 ["u32", "s32", "f16", "f32"], 625 ["s32", "f32"], 626 ): 627 628 # FIXME: missing intrinsics. 629 # Multi-sample textures and multi-sample texture arrays 630 # introduced in PTX ISA version 3.2. 631 if geom in ("2dms", "a2dms"): 632 continue 633 634 # FIXME: missing intrinsics? no such restriction in the PTX spec 635 if ctype == "s32" and mipmap != "": 636 continue 637 638 # FIXME: missing intrinsics? 639 if ctype == "s32" and geom in ("cube", "acube"): 640 continue 641 642 # FIXME: missing intrinsics. 643 # Support for textures returning f16 and f16x2 data introduced in 644 # PTX ISA version 4.2. 645 if vec == "v2" or dtype == "f16": 646 continue 647 648 # FIXME: missing intrinsics. 649 # Support for tex.grad.{cube, acube} introduced in PTX ISA version 650 # 4.3, currently supported only in unified mode. 651 if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"): 652 continue 653 654 # The instruction returns a two-element vector for destination 655 # type f16x2. For all other destination types, the instruction 656 # returns a four-element vector. Coordinates may be given in 657 # either signed 32-bit integer or 32-bit floating point form. 658 if vec == "v2" and dtype != "f16": 659 continue 660 661 sampler_handle, get_sampler_handle = get_llvm_global_sampler( 662 target, global_sampler 663 ) 664 665 test_name = "test_tex_" + "".join((mipmap, geom, vec, dtype, ctype)) 666 params = { 667 "test_name": test_name, 668 "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format( 669 unified=(".unified" if is_unified(target) else ""), 670 geom=get_llvm_geom(geom), 671 mipmap=("" if mipmap == "" else "." + mipmap), 672 vec=vec, 673 dtype=dtype, 674 ctype=ctype, 675 ), 676 "global_tex": global_tex, 677 "retty": get_llvm_vec_type(vec, dtype), 678 "sampler": sampler_handle, 679 "access": get_llvm_texture_access(geom, ctype, mipmap), 680 "get_sampler_handle": get_sampler_handle, 681 "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format( 682 mipmap=("" if mipmap == "" else "." + mipmap), 683 geom=geom, 684 vec=vec, 685 dtype=dtype, 686 ctype=ctype, 687 ), 688 "ptx_ret": get_ptx_vec_reg(vec, dtype), 689 "ptx_tex": get_ptx_texture(target), 690 "ptx_access": get_ptx_texture_access(geom, ctype), 691 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), 692 } 693 gen_test(template, params) 694 generated_items.append((params["intrinsic"], params["instruction"])) 695 696 fun_name = test_name + "_param" 697 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format( 698 sampler=("" if is_unified(target) else "i64,"), 699 retty=params["retty"], 700 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap), 701 ) 702 generated_metadata += get_texture_metadata( 703 target, fun_ty, fun_name, has_texture_params=True 704 ) 705 706 fun_name = test_name + "_global" 707 fun_ty = "void ({retty}*, {access_ty})*".format( 708 retty=params["retty"], 709 access_ty=get_llvm_texture_access_type(geom, ctype, mipmap), 710 ) 711 generated_metadata += get_texture_metadata( 712 target, fun_ty, fun_name, has_texture_params=False 713 ) 714 715 return generated_items, generated_metadata 716 717 718def get_llvm_tld4_access(geom): 719 """ 720 For 2D textures, operand c specifies coordinates as a two-element, 721 32-bit floating-point vector. 722 723 For 2d texture arrays operand c is a four element, 32-bit 724 vector. The first element in operand c is interpreted as an unsigned 725 integer index (.u32) into the texture array, and the next two 726 elements are interpreted as 32-bit floating point coordinates of 2d 727 texture. The fourth element is ignored. 728 729 For cubemap textures, operand c specifies four-element vector which 730 comprises three floating-point coordinates (s, t, r) and a fourth 731 padding argument which is ignored. 732 733 [For cube arrays] The first element in operand c is interpreted as 734 an unsigned integer index (.u32) into the cubemap texture array, and 735 the remaining three elements are interpreted as floating-point 736 cubemap coordinates (s, t, r), used to lookup in the selected 737 cubemap. 738 """ 739 geom_to_access = { 740 "2d": "float %x, float %y", 741 "a2d": "i32 %l, float %x, float %y", 742 "cube": "float %s, float %t, float %r", 743 "acube": "i32 %l, float %s, float %t, float %r", 744 } 745 return geom_to_access[geom] 746 747 748def get_llvm_tld4_access_type(geom): 749 geom_to_access = { 750 "2d": "float, float", 751 "a2d": "i32, float, float", 752 "cube": "float, float, float", 753 "acube": "i32, float, float, float", 754 } 755 return geom_to_access[geom] 756 757 758def get_ptx_tld4_access(geom): 759 geom_to_access = { 760 "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}", 761 "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}", 762 "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}", 763 "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}", 764 } 765 return geom_to_access[geom] 766 767 768def gen_tld4_tests(target, global_tex, global_sampler): 769 """ 770 PTX spec s9.7.9.4. Texture Instructions: tld4 771 Perform a texture fetch of the 4-texel bilerp footprint. 772 773 tld4.comp.2d.v4.dtype.f32 d[|p], [a, c] {, e} {, f}; 774 tld4.comp.geom.v4.dtype.f32 d[|p], [a, b, c] {, e} {, f}; // explicit sampler 775 776 .comp = { .r, .g, .b, .a }; 777 .geom = { .2d, .a2d, .cube, .acube }; 778 .dtype = { .u32, .s32, .f32 }; 779 """ 780 781 template = """ 782 declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) 783 784 ; CHECK-LABEL: .entry ${test_name}_param 785 ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}] 786 define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { 787 %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) 788 store ${retty} %val, ${retty}* %ret 789 ret void 790 } 791 ; CHECK-LABEL: .entry ${test_name}_global 792 ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] 793 define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { 794 %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) 795 ${get_sampler_handle} 796 %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access}) 797 store ${retty} %val, ${retty}* %ret 798 ret void 799 } 800 """ 801 802 generated_items = [] 803 generated_metadata = [] 804 for comp, geom, dtype in product( 805 ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"] 806 ): 807 808 # FIXME: missing intrinsics. 809 # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3. 810 if geom in ("a2d", "cube", "acube"): 811 continue 812 813 sampler_handle, get_sampler_handle = get_llvm_global_sampler( 814 target, global_sampler 815 ) 816 817 test_name = "test_tld4_" + "".join((comp, geom, dtype)) 818 params = { 819 "test_name": test_name, 820 "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format( 821 unified=(".unified" if is_unified(target) else ""), 822 comp=comp, 823 geom=get_llvm_geom(geom), 824 dtype=dtype, 825 ), 826 "global_tex": global_tex, 827 "retty": get_llvm_vec_type("v4", dtype), 828 "sampler": sampler_handle, 829 "access": get_llvm_tld4_access(geom), 830 "get_sampler_handle": get_sampler_handle, 831 "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format( 832 comp=comp, geom=geom, dtype=dtype 833 ), 834 "ptx_ret": get_ptx_vec_reg("v4", dtype), 835 "ptx_tex": get_ptx_texture(target), 836 "ptx_access": get_ptx_tld4_access(geom), 837 "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), 838 } 839 gen_test(template, params) 840 generated_items.append((params["intrinsic"], params["instruction"])) 841 842 fun_name = test_name + "_param" 843 fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format( 844 sampler=("" if is_unified(target) else "i64,"), 845 retty=params["retty"], 846 access_ty=get_llvm_tld4_access_type(geom), 847 ) 848 generated_metadata += get_texture_metadata( 849 target, fun_ty, fun_name, has_texture_params=True 850 ) 851 852 fun_name = test_name + "_global" 853 fun_ty = "void ({retty}*, {access_ty})*".format( 854 retty=params["retty"], access_ty=get_llvm_tld4_access_type(geom) 855 ) 856 generated_metadata += get_texture_metadata( 857 target, fun_ty, fun_name, has_texture_params=False 858 ) 859 860 return generated_items, generated_metadata 861 862 863def gen_test(template, params): 864 if debug: 865 print() 866 for param, value in params.items(): 867 print(";; {}: {}".format(param, value)) 868 869 print(string.Template(textwrap.dedent(template)).substitute(params)) 870 871 872def gen_tests(target, tests): 873 gen_triple(target) 874 875 items = [] 876 metadata = [] 877 878 global_surf = "gsurf" 879 global_tex = "gtex" 880 global_sampler = "gsam" 881 metadata += gen_globals(target, global_surf, global_tex, global_sampler) 882 883 if "suld" in tests: 884 suld_items, suld_md = gen_suld_tests(target, global_surf) 885 items += suld_items 886 metadata += suld_md 887 if "sust" in tests: 888 sust_items, sust_md = gen_sust_tests(target, global_surf) 889 items += sust_items 890 metadata += sust_md 891 if "tex" in tests: 892 tex_items, tex_md = gen_tex_tests(target, global_tex, global_sampler) 893 items += tex_items 894 metadata += tex_md 895 if "tld4" in tests: 896 tld4_items, tld4_md = gen_tld4_tests(target, global_tex, global_sampler) 897 items += tld4_items 898 metadata += tld4_md 899 900 gen_metadata(metadata) 901 return items 902 903 904def write_gen_list(filename, append, items): 905 with open(filename, ("a" if append else "w")) as f: 906 for intrinsic, instruction in items: 907 f.write("{} {}\n".format(intrinsic, instruction)) 908 909 910def read_gen_list(filename): 911 intrinsics = set() 912 instructions = set() 913 with open(filename) as f: 914 for line in f: 915 intrinsic, instruction = line.split() 916 intrinsics.add(intrinsic) 917 instructions.add(instruction) 918 return (intrinsics, instructions) 919 920 921def read_td_list(filename, regex): 922 td_list = set() 923 with open(filename) as f: 924 for line in f: 925 match = re.search(regex, line) 926 if match: 927 td_list.add(match.group(1)) 928 929 # Arbitrary value - we should find quite a lot of instructions 930 if len(td_list) < 30: 931 raise RuntimeError( 932 "found only {} instructions in {}".format(filename, len(td_list)) 933 ) 934 935 return td_list 936 937 938def verify_inst_tablegen(path_td, gen_instr): 939 """ 940 Verify that all instructions defined in NVPTXIntrinsics.td are 941 tested. 942 """ 943 944 td_instr = read_td_list(path_td, '"((suld|sust|tex|tld4)\\..*)"') 945 946 gen_instr.update( 947 { 948 # FIXME: spec does not list any sust.p variants other than b32 949 "sust.p.1d.b8.trap", 950 "sust.p.1d.b16.trap", 951 "sust.p.1d.v2.b8.trap", 952 "sust.p.1d.v2.b16.trap", 953 "sust.p.1d.v4.b8.trap", 954 "sust.p.1d.v4.b16.trap", 955 "sust.p.a1d.b8.trap", 956 "sust.p.a1d.b16.trap", 957 "sust.p.a1d.v2.b8.trap", 958 "sust.p.a1d.v2.b16.trap", 959 "sust.p.a1d.v4.b8.trap", 960 "sust.p.a1d.v4.b16.trap", 961 "sust.p.2d.b8.trap", 962 "sust.p.2d.b16.trap", 963 "sust.p.2d.v2.b8.trap", 964 "sust.p.2d.v2.b16.trap", 965 "sust.p.2d.v4.b8.trap", 966 "sust.p.2d.v4.b16.trap", 967 "sust.p.a2d.b8.trap", 968 "sust.p.a2d.b16.trap", 969 "sust.p.a2d.v2.b8.trap", 970 "sust.p.a2d.v2.b16.trap", 971 "sust.p.a2d.v4.b8.trap", 972 "sust.p.a2d.v4.b16.trap", 973 "sust.p.3d.b8.trap", 974 "sust.p.3d.b16.trap", 975 "sust.p.3d.v2.b8.trap", 976 "sust.p.3d.v2.b16.trap", 977 "sust.p.3d.v4.b8.trap", 978 "sust.p.3d.v4.b16.trap", 979 # FIXME: sust.p is also not supported for arrays 980 "sust.p.a1d.b32.trap", 981 "sust.p.a1d.v2.b32.trap", 982 "sust.p.a1d.v4.b32.trap", 983 "sust.p.a2d.b32.trap", 984 "sust.p.a2d.v2.b32.trap", 985 "sust.p.a2d.v4.b32.trap", 986 } 987 ) 988 989 td_instr = list(td_instr) 990 td_instr.sort() 991 gen_instr = list(gen_instr) 992 gen_instr.sort() 993 for i, td in enumerate(td_instr): 994 if i == len(gen_instr) or td != gen_instr[i]: 995 raise RuntimeError( 996 "{} is present in tablegen, but not tested.\n".format(td) 997 ) 998 999 1000def verify_llvm_tablegen(path_td, gen_intr): 1001 """ 1002 Verify that all intrinsics defined in IntrinsicsNVVM.td are 1003 tested. 1004 """ 1005 1006 td_intr = read_td_list(path_td, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"') 1007 1008 gen_intr.update( 1009 { 1010 # FIXME: spec does not list any sust.p variants other than b32 1011 "llvm.nvvm.sust.p.1d.i8.trap", 1012 "llvm.nvvm.sust.p.1d.i16.trap", 1013 "llvm.nvvm.sust.p.1d.v2i8.trap", 1014 "llvm.nvvm.sust.p.1d.v2i16.trap", 1015 "llvm.nvvm.sust.p.1d.v4i8.trap", 1016 "llvm.nvvm.sust.p.1d.v4i16.trap", 1017 "llvm.nvvm.sust.p.1d.array.i8.trap", 1018 "llvm.nvvm.sust.p.1d.array.i16.trap", 1019 "llvm.nvvm.sust.p.1d.array.v2i8.trap", 1020 "llvm.nvvm.sust.p.1d.array.v2i16.trap", 1021 "llvm.nvvm.sust.p.1d.array.v4i8.trap", 1022 "llvm.nvvm.sust.p.1d.array.v4i16.trap", 1023 "llvm.nvvm.sust.p.2d.i8.trap", 1024 "llvm.nvvm.sust.p.2d.i16.trap", 1025 "llvm.nvvm.sust.p.2d.v2i8.trap", 1026 "llvm.nvvm.sust.p.2d.v2i16.trap", 1027 "llvm.nvvm.sust.p.2d.v4i8.trap", 1028 "llvm.nvvm.sust.p.2d.v4i16.trap", 1029 "llvm.nvvm.sust.p.2d.array.i8.trap", 1030 "llvm.nvvm.sust.p.2d.array.i16.trap", 1031 "llvm.nvvm.sust.p.2d.array.v2i8.trap", 1032 "llvm.nvvm.sust.p.2d.array.v2i16.trap", 1033 "llvm.nvvm.sust.p.2d.array.v4i8.trap", 1034 "llvm.nvvm.sust.p.2d.array.v4i16.trap", 1035 "llvm.nvvm.sust.p.3d.i8.trap", 1036 "llvm.nvvm.sust.p.3d.i16.trap", 1037 "llvm.nvvm.sust.p.3d.v2i8.trap", 1038 "llvm.nvvm.sust.p.3d.v2i16.trap", 1039 "llvm.nvvm.sust.p.3d.v4i8.trap", 1040 "llvm.nvvm.sust.p.3d.v4i16.trap", 1041 # FIXME: sust.p is also not supported for arrays 1042 "llvm.nvvm.sust.p.1d.array.i32.trap", 1043 "llvm.nvvm.sust.p.1d.array.v2i32.trap", 1044 "llvm.nvvm.sust.p.1d.array.v4i32.trap", 1045 "llvm.nvvm.sust.p.2d.array.i32.trap", 1046 "llvm.nvvm.sust.p.2d.array.v2i32.trap", 1047 "llvm.nvvm.sust.p.2d.array.v4i32.trap", 1048 } 1049 ) 1050 1051 td_intr = list(td_intr) 1052 td_intr.sort() 1053 gen_intr = list(gen_intr) 1054 gen_intr.sort() 1055 for i, td in enumerate(td_intr): 1056 if i == len(gen_intr) or td != gen_intr[i]: 1057 raise RuntimeError( 1058 "{} is present in tablegen, but not tested.\n".format(td) 1059 ) 1060 1061 1062parser = argparse.ArgumentParser() 1063parser.add_argument("--debug", action="store_true") 1064parser.add_argument("--tests", type=str) 1065parser.add_argument("--target", type=str) 1066parser.add_argument("--gen-list", dest="gen_list", type=str) 1067parser.add_argument("--gen-list-append", dest="gen_list_append", action="store_true") 1068parser.add_argument("--verify", action="store_true") 1069parser.add_argument("--llvm-tablegen", dest="llvm_td", type=str) 1070parser.add_argument("--inst-tablegen", dest="inst_td", type=str) 1071 1072args = parser.parse_args() 1073debug = args.debug 1074 1075if args.verify: 1076 intrinsics, instructions = read_gen_list(args.gen_list) 1077 verify_inst_tablegen(args.inst_td, instructions) 1078 verify_llvm_tablegen(args.llvm_td, intrinsics) 1079else: 1080 items = gen_tests(args.target, args.tests.split(",")) 1081 if args.gen_list: 1082 write_gen_list(args.gen_list, args.gen_list_append, items) 1083