xref: /llvm-project/llvm/test/CodeGen/NVPTX/surf-tex.py (revision 4583f6d3443c8dc6605c868724e3743161954210)
1# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
3# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
4
5# We only need to run this second time for texture tests, because
6# there is a difference between unified and non-unified intrinsics.
7#
8# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
10# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
11
12# Verify that all instructions and intrinsics defined in TableGen
13# files are tested. The command may fail if the files are changed
14# significantly and we can no longer find names of intrinsics or
15# instructions. In that case we can replace this command with a
16# reference list.
17#
18# Verification is turned off by default to avoid issues when the LLVM
19# source directory is not available.
20#
21# RUN-DISABLED:  %python %s --verify --gen-list=%t.list --llvm-tablegen=%S/../../../include/llvm/IR/IntrinsicsNVVM.td  --inst-tablegen=%S/../../../lib/Target/NVPTX/NVPTXIntrinsics.td
22
23from __future__ import print_function
24
25import argparse
26import re
27import string
28import textwrap
29from itertools import product
30
31
32def get_llvm_geom(geom_ptx):
33    geom = {
34        "1d": "1d",
35        "2d": "2d",
36        "3d": "3d",
37        "a1d": "1d.array",
38        "a2d": "2d.array",
39        "cube": "cube",
40        "acube": "cube.array",
41    }
42    return geom[geom_ptx]
43
44
45def get_ptx_reg(ty):
46    reg = {
47        "b8": "%rs{{[0-9]+}}",
48        "b16": "%rs{{[0-9]+}}",
49        "b32": "%r{{[0-9]+}}",
50        "b64": "%rd{{[0-9]+}}",
51        "f32": "%f{{[0-9]+}}",
52        "u32": "%r{{[0-9]+}}",
53        "s32": "%r{{[0-9]+}}",
54    }
55    return reg[ty]
56
57
58def get_ptx_vec_reg(vec, ty):
59    vec_reg = {
60        "": "{{{reg}}}",
61        "v2": "{{{reg}, {reg}}}",
62        "v4": "{{{reg}, {reg}, {reg}, {reg}}}",
63    }
64    return vec_reg[vec].format(reg=get_ptx_reg(ty))
65
66
67def get_llvm_type(ty):
68    if ty[0] in ("b", "s", "u"):
69        return "i" + ty[1:]
70    if ty == "f16":
71        return "half"
72    if ty == "f32":
73        return "float"
74    raise RuntimeError("invalid type: " + ty)
75
76
77def get_llvm_vec_type(vec, ty_ptx):
78    ty = get_llvm_type(ty_ptx)
79
80    # i8 is passed as i16, same as in PTX
81    if ty == "i8":
82        ty = "i16"
83
84    vec_ty = {
85        "": "{ty}",
86        "v2": "{{ {ty}, {ty} }}",
87        "v4": "{{ {ty}, {ty}, {ty}, {ty} }}",
88    }
89    return vec_ty[vec].format(ty=ty)
90
91
92def get_llvm_value(vec, ty_ptx):
93    ty = get_llvm_type(ty_ptx)
94
95    # i8 is passed as i16, same as in PTX
96    if ty == "i8":
97        ty = "i16"
98
99    value = {
100        "": "{ty} %v1",
101        "v2": "{ty} %v1, {ty} %v2",
102        "v4": "{ty} %v1, {ty} %v2, {ty} %v3, {ty} %v4",
103    }
104    return value[vec].format(ty=ty)
105
106
107def get_llvm_value_type(vec, ty_ptx):
108    ty = get_llvm_type(ty_ptx)
109
110    # i8 is passed as i16, same as in PTX
111    if ty == "i8":
112        ty = "i16"
113
114    value = {"": "{ty}", "v2": "{ty}, {ty}", "v4": "{ty}, {ty}, {ty}, {ty}"}
115    return value[vec].format(ty=ty)
116
117
118id_counter = 0
119
120
121def get_table_gen_id():
122    global id_counter
123    id_counter += 1
124    return id_counter
125
126
127def gen_triple(target):
128    if target == "cuda":
129        print('target triple = "nvptx64-unknown-cuda"\n')
130    elif target == "nvcl":
131        print('target triple = "nvptx64-unknown-nvcl"\n')
132    else:
133        raise RuntimeError("invalid target: " + target)
134
135
136def gen_globals(target, surf_name, tex_name, sampler_name):
137    print("declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)")
138    print("; CHECK: .global .surfref {}".format(surf_name))
139    print("; CHECK: .global .texref {}".format(tex_name))
140    print("@{} = internal addrspace(1) global i64 0, align 8".format(surf_name))
141    print("@{} = internal addrspace(1) global i64 1, align 8".format(tex_name))
142    generated_metadata = [
143        '!{{i64 addrspace(1)* @{}, !"surface", i32 1}}'.format(surf_name),
144        '!{{i64 addrspace(1)* @{}, !"texture", i32 1}}'.format(tex_name),
145    ]
146
147    if not is_unified(target):
148        print("; CHECK: .global .samplerref {}".format(sampler_name))
149        print("@{} = internal addrspace(1) global i64 1, align 8".format(sampler_name))
150        generated_metadata.append(
151            '!{{i64 addrspace(1)* @{}, !"sampler", i32 1}}'.format(sampler_name)
152        )
153
154    return generated_metadata
155
156
157def gen_metadata(metadata):
158    md_values = ["!{}".format(i) for i in range(len(metadata))]
159    print("!nvvm.annotations = !{{{values}}}".format(values=(", ".join(md_values))))
160    for i, md in enumerate(metadata):
161        print("!{} = {}".format(i, md))
162
163
164def get_llvm_surface_access(geom_ptx):
165    access = {
166        "1d": "i32 %x",
167        "2d": "i32 %x, i32 %y",
168        "3d": "i32 %x, i32 %y, i32 %z",
169        "a1d": "i32 %l, i32 %x",
170        "a2d": "i32 %l, i32 %x, i32 %y",
171    }
172    return access[geom_ptx]
173
174
175def get_llvm_surface_access_type(geom_ptx):
176    access_ty = {
177        "1d": "i32",
178        "2d": "i32, i32",
179        "3d": "i32, i32, i32",
180        "a1d": "i32, i32",
181        "a2d": "i32, i32, i32",
182    }
183    return access_ty[geom_ptx]
184
185
186def get_ptx_surface_access(geom_ptx):
187    """
188    Operand b is a scalar or singleton tuple for 1d surfaces; is a
189    two-element vector for 2d surfaces; and is a four-element vector
190    for 3d surfaces, where the fourth element is ignored. Coordinate
191    elements are of type .s32.
192
193    For 1d surface arrays, operand b has type .v2.b32. The first
194    element is interpreted as an unsigned integer index (.u32) into
195    the surface array, and the second element is interpreted as a 1d
196    surface coordinate of type .s32.
197
198    For 2d surface arrays, operand b has type .v4.b32. The first
199    element is interpreted as an unsigned integer index (.u32) into
200    the surface array, and the next two elements are interpreted as 2d
201    surface coordinates of type .s32. The fourth element is ignored.
202    """
203    access_reg = {
204        "1d": "{%r{{[0-9]}}}",
205        "2d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
206        "3d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
207        "a1d": "{%r{{[0-9]}}, %r{{[0-9]}}}",
208        "a2d": "{%r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}, %r{{[0-9]}}}",
209    }
210    return access_reg[geom_ptx]
211
212
213def get_ptx_surface(target):
214    # With 'cuda' environment surface is copied with ld.param, so the
215    # instruction uses a register. For 'nvcl' the instruction uses the
216    # parameter directly.
217    if target == "cuda":
218        return "%rd{{[0-9]+}}"
219    elif target == "nvcl":
220        return "test_{{.*}}_param_0"
221    raise RuntimeError("invalid target: " + target)
222
223
224def get_surface_metadata(target, fun_ty, fun_name, has_surface_param):
225    metadata = []
226
227    if target == "cuda":
228        # When a parameter is lowered as a .surfref, it still has the
229        # corresponding ld.param.u64, which is illegal. Do not emit the
230        # metadata to keep the parameter as .b64 instead.
231        has_surface_param = False
232
233    if has_surface_param:
234        md_surface = '!{{{fun_ty} @{fun_name}, !"rdwrimage", i32 0}}'.format(
235            fun_ty=fun_ty, fun_name=fun_name
236        )
237        metadata.append(md_surface)
238
239    return metadata
240
241
242def gen_suld_tests(target, global_surf):
243    """
244    PTX spec s9.7.10.1. Surface Instructions:
245
246    suld.b.geom{.cop}.vec.dtype.clamp  d, [a, b];  // unformatted
247
248    .geom  = { .1d, .2d, .3d, .a1d, .a2d };
249    .cop   = { .ca, .cg, .cs, .cv };               // cache operation
250    .vec   = { none, .v2, .v4 };
251    .dtype = { .b8 , .b16, .b32, .b64 };
252    .clamp = { .trap, .clamp, .zero };
253    """
254
255    template = """
256  declare ${retty} @${intrinsic}(i64 %s, ${access});
257
258  ; CHECK-LABEL: .entry ${test_name}_param
259  ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}]
260  ;
261  define ptx_kernel void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) {
262    %val = tail call ${retty} @${intrinsic}(i64 %s, ${access})
263    store ${retty} %val, ${retty}* %ret
264    ret void
265  }
266  ; CHECK-LABEL: .entry ${test_name}_global
267  ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
268  define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
269    %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
270    %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
271    store ${retty} %val, ${retty}* %ret
272    ret void
273  }
274  """
275
276    generated_items = []
277    generated_metadata = []
278    # FIXME: "cop" is missing
279    for geom, vec, dtype, clamp in product(
280        ["1d", "2d", "3d", "a1d", "a2d"],
281        ["", "v2", "v4"],
282        ["b8", "b16", "b32", "b64"],
283        ["trap", "clamp", "zero"],
284    ):
285
286        if vec == "v4" and dtype == "b64":
287            continue
288
289        test_name = "test_suld_" + geom + vec + dtype + clamp
290
291        params = {
292            "test_name": test_name,
293            "intrinsic": "llvm.nvvm.suld.{geom}.{dtype}.{clamp}".format(
294                geom=get_llvm_geom(geom),
295                dtype=(vec + get_llvm_type(dtype)),
296                clamp=clamp,
297            ),
298            "retty": get_llvm_vec_type(vec, dtype),
299            "access": get_llvm_surface_access(geom),
300            "global_surf": global_surf,
301            "instruction": "suld.b.{geom}{vec}.{dtype}.{clamp}".format(
302                geom=geom,
303                vec=("" if vec == "" else "." + vec),
304                dtype=dtype,
305                clamp=clamp,
306            ),
307            "reg_ret": get_ptx_vec_reg(vec, dtype),
308            "reg_surf": get_ptx_surface(target),
309            "reg_access": get_ptx_surface_access(geom),
310        }
311        gen_test(template, params)
312        generated_items.append((params["intrinsic"], params["instruction"]))
313
314        fun_name = test_name + "_param"
315        fun_ty = "void (i64, {retty}*, {access_ty})*".format(
316            retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
317        )
318        generated_metadata += get_surface_metadata(
319            target, fun_ty, fun_name, has_surface_param=True
320        )
321
322        fun_name = test_name + "_global"
323        fun_ty = "void ({retty}*, {access_ty})*".format(
324            retty=params["retty"], access_ty=get_llvm_surface_access_type(geom)
325        )
326        generated_metadata += get_surface_metadata(
327            target, fun_ty, fun_name, has_surface_param=False
328        )
329
330    return generated_items, generated_metadata
331
332
333def gen_sust_tests(target, global_surf):
334    """
335    PTX spec s9.7.10.2. Surface Instructions
336
337    sust.b.{1d,2d,3d}{.cop}.vec.ctype.clamp  [a, b], c;  // unformatted
338    sust.p.{1d,2d,3d}.vec.b32.clamp          [a, b], c;  // formatted
339
340    sust.b.{a1d,a2d}{.cop}.vec.ctype.clamp   [a, b], c;  // unformatted
341
342    .cop   = { .wb, .cg, .cs, .wt };                     // cache operation
343    .vec   = { none, .v2, .v4 };
344    .ctype = { .b8 , .b16, .b32, .b64 };
345    .clamp = { .trap, .clamp, .zero };
346    """
347
348    template = """
349  declare void @${intrinsic}(i64 %s, ${access}, ${value});
350
351  ; CHECK-LABEL: .entry ${test_name}_param
352  ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value}
353  ;
354  define ptx_kernel void @${test_name}_param(i64 %s, ${value}, ${access}) {
355    tail call void @${intrinsic}(i64 %s, ${access}, ${value})
356    ret void
357  }
358  ; CHECK-LABEL: .entry ${test_name}_global
359  ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
360  define ptx_kernel void @${test_name}_global(${value}, ${access}) {
361    %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
362    tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
363    ret void
364  }
365  """
366
367    generated_items = []
368    generated_metadata = []
369    # FIXME: "cop" is missing
370    for fmt, geom, vec, ctype, clamp in product(
371        ["b", "p"],
372        ["1d", "2d", "3d", "a1d", "a2d"],
373        ["", "v2", "v4"],
374        ["b8", "b16", "b32", "b64"],
375        ["trap", "clamp", "zero"],
376    ):
377
378        if fmt == "p" and geom[0] == "a":
379            continue
380        if fmt == "p" and ctype != "b32":
381            continue
382        if vec == "v4" and ctype == "b64":
383            continue
384
385        # FIXME: these intrinsics are missing, but at least one of them is
386        # listed in the PTX spec: sust.p.{1d,2d,3d}.vec.b32.clamp
387        if fmt == "p" and clamp != "trap":
388            continue
389
390        test_name = "test_sust_" + fmt + geom + vec + ctype + clamp
391
392        params = {
393            "test_name": test_name,
394            "intrinsic": "llvm.nvvm.sust.{fmt}.{geom}.{ctype}.{clamp}".format(
395                fmt=fmt,
396                geom=get_llvm_geom(geom),
397                ctype=(vec + get_llvm_type(ctype)),
398                clamp=clamp,
399            ),
400            "access": get_llvm_surface_access(geom),
401            "value": get_llvm_value(vec, ctype),
402            "global_surf": global_surf,
403            "instruction": "sust.{fmt}.{geom}{vec}.{ctype}.{clamp}".format(
404                fmt=fmt,
405                geom=geom,
406                vec=("" if vec == "" else "." + vec),
407                ctype=ctype,
408                clamp=clamp,
409            ),
410            "reg_value": get_ptx_vec_reg(vec, ctype),
411            "reg_surf": get_ptx_surface(target),
412            "reg_access": get_ptx_surface_access(geom),
413        }
414        gen_test(template, params)
415        generated_items.append((params["intrinsic"], params["instruction"]))
416
417        fun_name = test_name + "_param"
418        fun_ty = "ptr"
419        generated_metadata += get_surface_metadata(
420            target, fun_ty, fun_name, has_surface_param=True
421        )
422
423        fun_name = test_name + "_global"
424        fun_ty = "ptr"
425        generated_metadata += get_surface_metadata(
426            target, fun_ty, fun_name, has_surface_param=False
427        )
428
429    return generated_items, generated_metadata
430
431
432def is_unified(target):
433    """
434    PTX has two modes of operation. In the unified mode, texture and
435    sampler information is accessed through a single .texref handle. In
436    the independent mode, texture and sampler information each have their
437    own handle, allowing them to be defined separately and combined at the
438    site of usage in the program.
439
440    """
441    return target == "cuda"
442
443
444def get_llvm_texture_access(geom_ptx, ctype, mipmap):
445    geom_access = {
446        "1d": "{ctype} %x",
447        "2d": "{ctype} %x, {ctype} %y",
448        "3d": "{ctype} %x, {ctype} %y, {ctype} %z",
449        "cube": "{ctype} %s, {ctype} %t, {ctype} %r",
450        "a1d": "i32 %l, {ctype} %x",
451        "a2d": "i32 %l, {ctype} %x, {ctype} %y",
452        "acube": "i32 %l, {ctype} %s, {ctype} %t, {ctype} %r",
453    }
454
455    access = geom_access[geom_ptx]
456
457    if mipmap == "level":
458        access += ", {ctype} %lvl"
459    elif mipmap == "grad":
460        if geom_ptx in ("1d", "a1d"):
461            access += ", {ctype} %dpdx1, {ctype} %dpdy1"
462        elif geom_ptx in ("2d", "a2d"):
463            access += (
464                ", {ctype} %dpdx1, {ctype} %dpdx2" + ", {ctype} %dpdy1, {ctype} %dpdy2"
465            )
466        else:
467            access += (
468                ", {ctype} %dpdx1, {ctype} %dpdx2, {ctype} %dpdx3"
469                + ", {ctype} %dpdy1, {ctype} %dpdy2, {ctype} %dpdy3"
470            )
471
472    return access.format(ctype=get_llvm_type(ctype))
473
474
475def get_llvm_texture_access_type(geom_ptx, ctype, mipmap):
476    geom_access = {
477        "1d": "{ctype}",
478        "2d": "{ctype}, {ctype}",
479        "3d": "{ctype}, {ctype}, {ctype}",
480        "cube": "{ctype}, {ctype}, {ctype}",
481        "a1d": "i32, {ctype}",
482        "a2d": "i32, {ctype}, {ctype}",
483        "acube": "i32, {ctype}, {ctype}, {ctype}",
484    }
485
486    access = geom_access[geom_ptx]
487
488    if mipmap == "level":
489        access += ", {ctype}"
490    elif mipmap == "grad":
491        if geom_ptx in ("1d", "a1d"):
492            access += ", {ctype}, {ctype}"
493        elif geom_ptx in ("2d", "a2d"):
494            access += ", {ctype}, {ctype}, {ctype}, {ctype}"
495        else:
496            access += ", {ctype}, {ctype}, {ctype}" + ", {ctype}, {ctype}, {ctype}"
497
498    return access.format(ctype=get_llvm_type(ctype))
499
500
501def get_ptx_texture_access(geom_ptx, ctype):
502    access_reg = {
503        "1d": "{{{ctype_reg}}}",
504        "2d": "{{{ctype_reg}, {ctype_reg}}}",
505        "3d": "{{{ctype_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
506        "a1d": "{{{b32_reg}, {ctype_reg}}}",
507        "a2d": "{{{b32_reg}, {ctype_reg}, {ctype_reg}, {ctype_reg}}}",
508        "cube": "{{{f32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
509        "acube": "{{{b32_reg}, {f32_reg}, {f32_reg}, {f32_reg}}}",
510    }
511    return access_reg[geom_ptx].format(
512        ctype_reg=get_ptx_reg(ctype),
513        b32_reg=get_ptx_reg("b32"),
514        f32_reg=get_ptx_reg("f32"),
515    )
516
517
518def get_ptx_texture(target):
519    # With 'cuda' environment texture/sampler are copied with ld.param,
520    # so the instruction uses registers. For 'nvcl' the instruction uses
521    # texture/sampler parameters directly.
522    if target == "cuda":
523        return "%rd{{[0-9]+}}"
524    elif target == "nvcl":
525        return "test_{{.*}}_param_0, test_{{.*}}_param_1"
526    raise RuntimeError("unknown target: " + target)
527
528
529def get_llvm_global_sampler(target, global_sampler):
530    if is_unified(target):
531        return "", ""
532    else:
533        sampler_handle = "i64 %gs,"
534        get_sampler_handle = (
535            "%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64"
536            + "(i64 addrspace(1)* @{})".format(global_sampler)
537        )
538        return sampler_handle, get_sampler_handle
539
540
541def get_ptx_global_sampler(target, global_sampler):
542    if is_unified(target):
543        return ""
544    else:
545        return global_sampler + ","
546
547
548def get_texture_metadata(target, fun_ty, fun_name, has_texture_params):
549    metadata = []
550
551    if target == "cuda":
552        # When a parameter is lowered as a .texref, it still has the
553        # corresponding ld.param.u64, which is illegal. Do not emit the
554        # metadata to keep the parameter as .b64 instead.
555        has_texture_params = False
556
557    if has_texture_params:
558        md_texture = '!{{{fun_ty} @{fun_name}, !"rdoimage", i32 0}}'.format(
559            fun_ty=fun_ty, fun_name=fun_name
560        )
561        metadata.append(md_texture)
562
563        if not is_unified(target):
564            md_sampler = '!{{{fun_ty} @{fun_name}, !"sampler", i32 1}}'.format(
565                fun_ty=fun_ty, fun_name=fun_name
566            )
567            metadata.append(md_sampler)
568
569    return metadata
570
571
572def gen_tex_tests(target, global_tex, global_sampler):
573    """
574    PTX spec s9.7.9.3. Texture Instructions
575
576    tex.geom.v4.dtype.ctype  d, [a, c] {, e} {, f};
577    tex.geom.v4.dtype.ctype  d[|p], [a, b, c] {, e} {, f};  // explicit sampler
578
579    tex.geom.v2.f16x2.ctype  d[|p], [a, c] {, e} {, f};
580    tex.geom.v2.f16x2.ctype  d[|p], [a, b, c] {, e} {, f};  // explicit sampler
581
582    // mipmaps
583    tex.base.geom.v4.dtype.ctype   d[|p], [a, {b,} c] {, e} {, f};
584    tex.level.geom.v4.dtype.ctype  d[|p], [a, {b,} c], lod {, e} {, f};
585    tex.grad.geom.v4.dtype.ctype   d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
586
587    tex.base.geom.v2.f16x2.ctype   d[|p], [a, {b,} c] {, e} {, f};
588    tex.level.geom.v2.f16x2.ctype  d[|p], [a, {b,} c], lod {, e} {, f};
589    tex.grad.geom.v2.f16x2.ctype   d[|p], [a, {b,} c], dPdx, dPdy {, e} {, f};
590
591    .geom  = { .1d, .2d, .3d, .a1d, .a2d, .cube, .acube, .2dms, .a2dms };
592    .dtype = { .u32, .s32, .f16,  .f32 };
593    .ctype = {       .s32, .f32 };          // .cube, .acube require .f32
594                                            // .2dms, .a2dms require .s32
595    """
596
597    template = """
598  declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
599
600  ; CHECK-LABEL: .entry ${test_name}_param
601  ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
602  define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
603    %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
604    store ${retty} %val, ${retty}* %ret
605    ret void
606  }
607  ; CHECK-LABEL: .entry ${test_name}_global
608  ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
609  define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
610    %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
611    ${get_sampler_handle}
612    %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
613    store ${retty} %val, ${retty}* %ret
614    ret void
615  }
616  """
617
618    generated_items = []
619    generated_metadata = []
620    for mipmap, geom, vec, dtype, ctype in product(
621        ["", "level", "grad"],
622        ["1d", "2d", "3d", "a1d", "a2d", "cube", "acube", "2dms", "a2dms"],
623        ["v2", "v4"],
624        ["u32", "s32", "f16", "f32"],
625        ["s32", "f32"],
626    ):
627
628        # FIXME: missing intrinsics.
629        # Multi-sample textures and multi-sample texture arrays
630        # introduced in PTX ISA version 3.2.
631        if geom in ("2dms", "a2dms"):
632            continue
633
634        # FIXME: missing intrinsics? no such restriction in the PTX spec
635        if ctype == "s32" and mipmap != "":
636            continue
637
638        # FIXME: missing intrinsics?
639        if ctype == "s32" and geom in ("cube", "acube"):
640            continue
641
642        # FIXME: missing intrinsics.
643        # Support for textures returning f16 and f16x2 data introduced in
644        # PTX ISA version 4.2.
645        if vec == "v2" or dtype == "f16":
646            continue
647
648        # FIXME: missing intrinsics.
649        # Support for tex.grad.{cube, acube} introduced in PTX ISA version
650        # 4.3, currently supported only in unified mode.
651        if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"):
652            continue
653
654        # The instruction returns a two-element vector for destination
655        # type f16x2. For all other destination types, the instruction
656        # returns a four-element vector. Coordinates may be given in
657        # either signed 32-bit integer or 32-bit floating point form.
658        if vec == "v2" and dtype != "f16":
659            continue
660
661        sampler_handle, get_sampler_handle = get_llvm_global_sampler(
662            target, global_sampler
663        )
664
665        test_name = "test_tex_" + "".join((mipmap, geom, vec, dtype, ctype))
666        params = {
667            "test_name": test_name,
668            "intrinsic": "llvm.nvvm.tex{unified}.{geom}{mipmap}.{vec}{dtype}.{ctype}".format(
669                unified=(".unified" if is_unified(target) else ""),
670                geom=get_llvm_geom(geom),
671                mipmap=("" if mipmap == "" else "." + mipmap),
672                vec=vec,
673                dtype=dtype,
674                ctype=ctype,
675            ),
676            "global_tex": global_tex,
677            "retty": get_llvm_vec_type(vec, dtype),
678            "sampler": sampler_handle,
679            "access": get_llvm_texture_access(geom, ctype, mipmap),
680            "get_sampler_handle": get_sampler_handle,
681            "instruction": "tex{mipmap}.{geom}.{vec}.{dtype}.{ctype}".format(
682                mipmap=("" if mipmap == "" else "." + mipmap),
683                geom=geom,
684                vec=vec,
685                dtype=dtype,
686                ctype=ctype,
687            ),
688            "ptx_ret": get_ptx_vec_reg(vec, dtype),
689            "ptx_tex": get_ptx_texture(target),
690            "ptx_access": get_ptx_texture_access(geom, ctype),
691            "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
692        }
693        gen_test(template, params)
694        generated_items.append((params["intrinsic"], params["instruction"]))
695
696        fun_name = test_name + "_param"
697        fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
698            sampler=("" if is_unified(target) else "i64,"),
699            retty=params["retty"],
700            access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
701        )
702        generated_metadata += get_texture_metadata(
703            target, fun_ty, fun_name, has_texture_params=True
704        )
705
706        fun_name = test_name + "_global"
707        fun_ty = "void ({retty}*, {access_ty})*".format(
708            retty=params["retty"],
709            access_ty=get_llvm_texture_access_type(geom, ctype, mipmap),
710        )
711        generated_metadata += get_texture_metadata(
712            target, fun_ty, fun_name, has_texture_params=False
713        )
714
715    return generated_items, generated_metadata
716
717
718def get_llvm_tld4_access(geom):
719    """
720    For 2D textures, operand c specifies coordinates as a two-element,
721    32-bit floating-point vector.
722
723    For 2d texture arrays operand c is a four element, 32-bit
724    vector. The first element in operand c is interpreted as an unsigned
725    integer index (.u32) into the texture array, and the next two
726    elements are interpreted as 32-bit floating point coordinates of 2d
727    texture. The fourth element is ignored.
728
729    For cubemap textures, operand c specifies four-element vector which
730    comprises three floating-point coordinates (s, t, r) and a fourth
731    padding argument which is ignored.
732
733    [For cube arrays] The first element in operand c is interpreted as
734    an unsigned integer index (.u32) into the cubemap texture array, and
735    the remaining three elements are interpreted as floating-point
736    cubemap coordinates (s, t, r), used to lookup in the selected
737    cubemap.
738    """
739    geom_to_access = {
740        "2d": "float %x, float %y",
741        "a2d": "i32 %l, float %x, float %y",
742        "cube": "float %s, float %t, float %r",
743        "acube": "i32 %l, float %s, float %t, float %r",
744    }
745    return geom_to_access[geom]
746
747
748def get_llvm_tld4_access_type(geom):
749    geom_to_access = {
750        "2d": "float, float",
751        "a2d": "i32, float, float",
752        "cube": "float, float, float",
753        "acube": "i32, float, float, float",
754    }
755    return geom_to_access[geom]
756
757
758def get_ptx_tld4_access(geom):
759    geom_to_access = {
760        "2d": "{%f{{[0-9]+}}, %f{{[0-9]+}}}",
761        "a2d": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
762        "cube": "{%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
763        "acube": "{%r{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}",
764    }
765    return geom_to_access[geom]
766
767
768def gen_tld4_tests(target, global_tex, global_sampler):
769    """
770    PTX spec s9.7.9.4. Texture Instructions: tld4
771    Perform a texture fetch of the 4-texel bilerp footprint.
772
773    tld4.comp.2d.v4.dtype.f32    d[|p], [a, c] {, e} {, f};
774    tld4.comp.geom.v4.dtype.f32  d[|p], [a, b, c] {, e} {, f};  // explicit sampler
775
776    .comp  = { .r, .g, .b, .a };
777    .geom  = { .2d, .a2d, .cube, .acube };
778    .dtype = { .u32, .s32, .f32 };
779    """
780
781    template = """
782  declare ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
783
784  ; CHECK-LABEL: .entry ${test_name}_param
785  ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}]
786  define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) {
787    %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access})
788    store ${retty} %val, ${retty}* %ret
789    ret void
790  }
791  ; CHECK-LABEL: .entry ${test_name}_global
792  ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
793  define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
794    %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
795    ${get_sampler_handle}
796    %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access})
797    store ${retty} %val, ${retty}* %ret
798    ret void
799  }
800  """
801
802    generated_items = []
803    generated_metadata = []
804    for comp, geom, dtype in product(
805        ["r", "g", "b", "a"], ["2d", "a2d", "cube", "acube"], ["u32", "s32", "f32"]
806    ):
807
808        # FIXME: missing intrinsics.
809        # tld4.{a2d,cube,acube} introduced in PTX ISA version 4.3.
810        if geom in ("a2d", "cube", "acube"):
811            continue
812
813        sampler_handle, get_sampler_handle = get_llvm_global_sampler(
814            target, global_sampler
815        )
816
817        test_name = "test_tld4_" + "".join((comp, geom, dtype))
818        params = {
819            "test_name": test_name,
820            "intrinsic": "llvm.nvvm.tld4{unified}.{comp}.{geom}.v4{dtype}.f32".format(
821                unified=(".unified" if is_unified(target) else ""),
822                comp=comp,
823                geom=get_llvm_geom(geom),
824                dtype=dtype,
825            ),
826            "global_tex": global_tex,
827            "retty": get_llvm_vec_type("v4", dtype),
828            "sampler": sampler_handle,
829            "access": get_llvm_tld4_access(geom),
830            "get_sampler_handle": get_sampler_handle,
831            "instruction": "tld4.{comp}.{geom}.v4.{dtype}.f32".format(
832                comp=comp, geom=geom, dtype=dtype
833            ),
834            "ptx_ret": get_ptx_vec_reg("v4", dtype),
835            "ptx_tex": get_ptx_texture(target),
836            "ptx_access": get_ptx_tld4_access(geom),
837            "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
838        }
839        gen_test(template, params)
840        generated_items.append((params["intrinsic"], params["instruction"]))
841
842        fun_name = test_name + "_param"
843        fun_ty = "void (i64, {sampler} {retty}*, {access_ty})*".format(
844            sampler=("" if is_unified(target) else "i64,"),
845            retty=params["retty"],
846            access_ty=get_llvm_tld4_access_type(geom),
847        )
848        generated_metadata += get_texture_metadata(
849            target, fun_ty, fun_name, has_texture_params=True
850        )
851
852        fun_name = test_name + "_global"
853        fun_ty = "void ({retty}*, {access_ty})*".format(
854            retty=params["retty"], access_ty=get_llvm_tld4_access_type(geom)
855        )
856        generated_metadata += get_texture_metadata(
857            target, fun_ty, fun_name, has_texture_params=False
858        )
859
860    return generated_items, generated_metadata
861
862
863def gen_test(template, params):
864    if debug:
865        print()
866        for param, value in params.items():
867            print(";; {}: {}".format(param, value))
868
869    print(string.Template(textwrap.dedent(template)).substitute(params))
870
871
872def gen_tests(target, tests):
873    gen_triple(target)
874
875    items = []
876    metadata = []
877
878    global_surf = "gsurf"
879    global_tex = "gtex"
880    global_sampler = "gsam"
881    metadata += gen_globals(target, global_surf, global_tex, global_sampler)
882
883    if "suld" in tests:
884        suld_items, suld_md = gen_suld_tests(target, global_surf)
885        items += suld_items
886        metadata += suld_md
887    if "sust" in tests:
888        sust_items, sust_md = gen_sust_tests(target, global_surf)
889        items += sust_items
890        metadata += sust_md
891    if "tex" in tests:
892        tex_items, tex_md = gen_tex_tests(target, global_tex, global_sampler)
893        items += tex_items
894        metadata += tex_md
895    if "tld4" in tests:
896        tld4_items, tld4_md = gen_tld4_tests(target, global_tex, global_sampler)
897        items += tld4_items
898        metadata += tld4_md
899
900    gen_metadata(metadata)
901    return items
902
903
904def write_gen_list(filename, append, items):
905    with open(filename, ("a" if append else "w")) as f:
906        for intrinsic, instruction in items:
907            f.write("{} {}\n".format(intrinsic, instruction))
908
909
910def read_gen_list(filename):
911    intrinsics = set()
912    instructions = set()
913    with open(filename) as f:
914        for line in f:
915            intrinsic, instruction = line.split()
916            intrinsics.add(intrinsic)
917            instructions.add(instruction)
918    return (intrinsics, instructions)
919
920
921def read_td_list(filename, regex):
922    td_list = set()
923    with open(filename) as f:
924        for line in f:
925            match = re.search(regex, line)
926            if match:
927                td_list.add(match.group(1))
928
929    # Arbitrary value - we should find quite a lot of instructions
930    if len(td_list) < 30:
931        raise RuntimeError(
932            "found only {} instructions in {}".format(filename, len(td_list))
933        )
934
935    return td_list
936
937
938def verify_inst_tablegen(path_td, gen_instr):
939    """
940    Verify that all instructions defined in NVPTXIntrinsics.td are
941    tested.
942    """
943
944    td_instr = read_td_list(path_td, '"((suld|sust|tex|tld4)\\..*)"')
945
946    gen_instr.update(
947        {
948            # FIXME: spec does not list any sust.p variants other than b32
949            "sust.p.1d.b8.trap",
950            "sust.p.1d.b16.trap",
951            "sust.p.1d.v2.b8.trap",
952            "sust.p.1d.v2.b16.trap",
953            "sust.p.1d.v4.b8.trap",
954            "sust.p.1d.v4.b16.trap",
955            "sust.p.a1d.b8.trap",
956            "sust.p.a1d.b16.trap",
957            "sust.p.a1d.v2.b8.trap",
958            "sust.p.a1d.v2.b16.trap",
959            "sust.p.a1d.v4.b8.trap",
960            "sust.p.a1d.v4.b16.trap",
961            "sust.p.2d.b8.trap",
962            "sust.p.2d.b16.trap",
963            "sust.p.2d.v2.b8.trap",
964            "sust.p.2d.v2.b16.trap",
965            "sust.p.2d.v4.b8.trap",
966            "sust.p.2d.v4.b16.trap",
967            "sust.p.a2d.b8.trap",
968            "sust.p.a2d.b16.trap",
969            "sust.p.a2d.v2.b8.trap",
970            "sust.p.a2d.v2.b16.trap",
971            "sust.p.a2d.v4.b8.trap",
972            "sust.p.a2d.v4.b16.trap",
973            "sust.p.3d.b8.trap",
974            "sust.p.3d.b16.trap",
975            "sust.p.3d.v2.b8.trap",
976            "sust.p.3d.v2.b16.trap",
977            "sust.p.3d.v4.b8.trap",
978            "sust.p.3d.v4.b16.trap",
979            # FIXME: sust.p is also not supported for arrays
980            "sust.p.a1d.b32.trap",
981            "sust.p.a1d.v2.b32.trap",
982            "sust.p.a1d.v4.b32.trap",
983            "sust.p.a2d.b32.trap",
984            "sust.p.a2d.v2.b32.trap",
985            "sust.p.a2d.v4.b32.trap",
986        }
987    )
988
989    td_instr = list(td_instr)
990    td_instr.sort()
991    gen_instr = list(gen_instr)
992    gen_instr.sort()
993    for i, td in enumerate(td_instr):
994        if i == len(gen_instr) or td != gen_instr[i]:
995            raise RuntimeError(
996                "{} is present in tablegen, but not tested.\n".format(td)
997            )
998
999
1000def verify_llvm_tablegen(path_td, gen_intr):
1001    """
1002    Verify that all intrinsics defined in IntrinsicsNVVM.td are
1003    tested.
1004    """
1005
1006    td_intr = read_td_list(path_td, '"(llvm\\.nvvm\\.(suld|sust|tex|tld4)\\..*)"')
1007
1008    gen_intr.update(
1009        {
1010            # FIXME: spec does not list any sust.p variants other than b32
1011            "llvm.nvvm.sust.p.1d.i8.trap",
1012            "llvm.nvvm.sust.p.1d.i16.trap",
1013            "llvm.nvvm.sust.p.1d.v2i8.trap",
1014            "llvm.nvvm.sust.p.1d.v2i16.trap",
1015            "llvm.nvvm.sust.p.1d.v4i8.trap",
1016            "llvm.nvvm.sust.p.1d.v4i16.trap",
1017            "llvm.nvvm.sust.p.1d.array.i8.trap",
1018            "llvm.nvvm.sust.p.1d.array.i16.trap",
1019            "llvm.nvvm.sust.p.1d.array.v2i8.trap",
1020            "llvm.nvvm.sust.p.1d.array.v2i16.trap",
1021            "llvm.nvvm.sust.p.1d.array.v4i8.trap",
1022            "llvm.nvvm.sust.p.1d.array.v4i16.trap",
1023            "llvm.nvvm.sust.p.2d.i8.trap",
1024            "llvm.nvvm.sust.p.2d.i16.trap",
1025            "llvm.nvvm.sust.p.2d.v2i8.trap",
1026            "llvm.nvvm.sust.p.2d.v2i16.trap",
1027            "llvm.nvvm.sust.p.2d.v4i8.trap",
1028            "llvm.nvvm.sust.p.2d.v4i16.trap",
1029            "llvm.nvvm.sust.p.2d.array.i8.trap",
1030            "llvm.nvvm.sust.p.2d.array.i16.trap",
1031            "llvm.nvvm.sust.p.2d.array.v2i8.trap",
1032            "llvm.nvvm.sust.p.2d.array.v2i16.trap",
1033            "llvm.nvvm.sust.p.2d.array.v4i8.trap",
1034            "llvm.nvvm.sust.p.2d.array.v4i16.trap",
1035            "llvm.nvvm.sust.p.3d.i8.trap",
1036            "llvm.nvvm.sust.p.3d.i16.trap",
1037            "llvm.nvvm.sust.p.3d.v2i8.trap",
1038            "llvm.nvvm.sust.p.3d.v2i16.trap",
1039            "llvm.nvvm.sust.p.3d.v4i8.trap",
1040            "llvm.nvvm.sust.p.3d.v4i16.trap",
1041            # FIXME: sust.p is also not supported for arrays
1042            "llvm.nvvm.sust.p.1d.array.i32.trap",
1043            "llvm.nvvm.sust.p.1d.array.v2i32.trap",
1044            "llvm.nvvm.sust.p.1d.array.v4i32.trap",
1045            "llvm.nvvm.sust.p.2d.array.i32.trap",
1046            "llvm.nvvm.sust.p.2d.array.v2i32.trap",
1047            "llvm.nvvm.sust.p.2d.array.v4i32.trap",
1048        }
1049    )
1050
1051    td_intr = list(td_intr)
1052    td_intr.sort()
1053    gen_intr = list(gen_intr)
1054    gen_intr.sort()
1055    for i, td in enumerate(td_intr):
1056        if i == len(gen_intr) or td != gen_intr[i]:
1057            raise RuntimeError(
1058                "{} is present in tablegen, but not tested.\n".format(td)
1059            )
1060
1061
1062parser = argparse.ArgumentParser()
1063parser.add_argument("--debug", action="store_true")
1064parser.add_argument("--tests", type=str)
1065parser.add_argument("--target", type=str)
1066parser.add_argument("--gen-list", dest="gen_list", type=str)
1067parser.add_argument("--gen-list-append", dest="gen_list_append", action="store_true")
1068parser.add_argument("--verify", action="store_true")
1069parser.add_argument("--llvm-tablegen", dest="llvm_td", type=str)
1070parser.add_argument("--inst-tablegen", dest="inst_td", type=str)
1071
1072args = parser.parse_args()
1073debug = args.debug
1074
1075if args.verify:
1076    intrinsics, instructions = read_gen_list(args.gen_list)
1077    verify_inst_tablegen(args.inst_td, instructions)
1078    verify_llvm_tablegen(args.llvm_td, intrinsics)
1079else:
1080    items = gen_tests(args.target, args.tests.split(","))
1081    if args.gen_list:
1082        write_gen_list(args.gen_list, args.gen_list_append, items)
1083