xref: /llvm-project/llvm/test/CodeGen/NVPTX/ctlz.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
2; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5
6declare i16 @llvm.ctlz.i16(i16, i1) readnone
7declare i32 @llvm.ctlz.i32(i32, i1) readnone
8declare i64 @llvm.ctlz.i64(i64, i1) readnone
9
10; There should be no difference between llvm.ctlz.i32(%a, true) and
11; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0.
12
13; CHECK-LABEL: myctlz(
14define i32 @myctlz(i32 %a) {
15; CHECK: ld.param.
16; CHECK-NEXT: clz.b32
17; CHECK-NEXT: st.param.
18; CHECK-NEXT: ret;
19  %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
20  ret i32 %val
21}
22; CHECK-LABEL: myctlz_2(
23define i32 @myctlz_2(i32 %a) {
24; CHECK: ld.param.
25; CHECK-NEXT: clz.b32
26; CHECK-NEXT: st.param.
27; CHECK-NEXT: ret;
28  %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
29  ret i32 %val
30}
31
32; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit
33; value, so here we have to zero-extend it.
34; CHECK-LABEL: myctlz64(
35define i64 @myctlz64(i64 %a) {
36; CHECK: ld.param.
37; CHECK-NEXT: clz.b64
38; CHECK-NEXT: cvt.u64.u32
39; CHECK-NEXT: st.param.
40; CHECK-NEXT: ret;
41  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
42  ret i64 %val
43}
44; CHECK-LABEL: myctlz64_2(
45define i64 @myctlz64_2(i64 %a) {
46; CHECK: ld.param.
47; CHECK-NEXT: clz.b64
48; CHECK-NEXT: cvt.u64.u32
49; CHECK-NEXT: st.param.
50; CHECK-NEXT: ret;
51  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
52  ret i64 %val
53}
54
55; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the
56; natural return width of ptx's clz.b64 instruction.  No conversions should be
57; necessary in the PTX.
58; CHECK-LABEL: myctlz64_as_32(
59define i32 @myctlz64_as_32(i64 %a) {
60; CHECK: ld.param.
61; CHECK-NEXT: clz.b64
62; CHECK-NEXT: st.param.
63; CHECK-NEXT: ret;
64  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
65  %trunc = trunc i64 %val to i32
66  ret i32 %trunc
67}
68; CHECK-LABEL: myctlz64_as_32_2(
69define i32 @myctlz64_as_32_2(i64 %a) {
70; CHECK: ld.param.
71; CHECK-NEXT: clz.b64
72; CHECK-NEXT: st.param.
73; CHECK-NEXT: ret;
74  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
75  %trunc = trunc i64 %val to i32
76  ret i32 %trunc
77}
78
79; ctlz.i16 is implemented by extending the input to i32, computing the result,
80; and then truncating the result back down to i16.  But the NVPTX ABI
81; zero-extends i16 return values to i32, so the final truncation doesn't appear
82; in this function.
83; CHECK-LABEL: myctlz_ret16(
84define i16 @myctlz_ret16(i16 %a) {
85; CHECK: ld.param.
86; CHECK-NEXT: cvt.u32.u16
87; CHECK-NEXT: clz.b32
88; CHECK-NEXT: sub.
89; CHECK-NEXT: st.param.
90; CHECK-NEXT: ret;
91  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
92  ret i16 %val
93}
94; CHECK-LABEL: myctlz_ret16_2(
95define i16 @myctlz_ret16_2(i16 %a) {
96; CHECK: ld.param.
97; CHECK-NEXT: cvt.u32.u16
98; CHECK-NEXT: clz.b32
99; CHECK-NEXT: sub.
100; CHECK-NEXT: st.param.
101; CHECK-NEXT: ret;
102  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
103  ret i16 %val
104}
105
106; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
107; remain.
108; CHECK-LABEL: myctlz_store16(
109define void @myctlz_store16(i16 %a, ptr %b) {
110; CHECK: ld.param.
111; CHECK-NEXT: cvt.u32.u16
112; CHECK-NEXT: clz.b32
113; CHECK-DAG: cvt.u16.u32
114; CHECK-DAG: sub.
115; CHECK: st.{{[a-z]}}16
116; CHECK: ret;
117  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
118  store i16 %val, ptr %b
119  ret void
120}
121; CHECK-LABEL: myctlz_store16_2(
122define void @myctlz_store16_2(i16 %a, ptr %b) {
123; CHECK: ld.param.
124; CHECK-NEXT: cvt.u32.u16
125; CHECK-NEXT: clz.b32
126; CHECK-DAG: cvt.u16.u32
127; CHECK-DAG: sub.
128; CHECK: st.{{[a-z]}}16
129; CHECK: ret;
130  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
131  store i16 %val, ptr %b
132  ret void
133}
134