xref: /llvm-project/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
4
5target triple = "nvptx64-nvidia-cuda"
6
7@value = internal addrspace(1) global i128 0, align 16
8
9define void @test_b128_input_from_const() {
10; CHECK-LABEL: test_b128_input_from_const(
11; CHECK:       {
12; CHECK-NEXT:    .reg .b64 %rd<5>;
13; CHECK-NEXT:    .reg .b128 %rq<2>;
14; CHECK-EMPTY:
15; CHECK-NEXT:  // %bb.0:
16; CHECK-NEXT:    mov.b64 %rd2, 0;
17; CHECK-NEXT:    mov.b64 %rd3, 42;
18; CHECK-NEXT:    mov.b128 %rq1, {%rd3, %rd2};
19; CHECK-NEXT:    mov.u64 %rd4, value;
20; CHECK-NEXT:    cvta.global.u64 %rd1, %rd4;
21; CHECK-NEXT:    // begin inline asm
22; CHECK-NEXT:    { st.b128 [%rd1], %rq1; }
23; CHECK-NEXT:    // end inline asm
24; CHECK-NEXT:    ret;
25  tail call void asm sideeffect "{ st.b128 [$0], $1; }", "l,q"(ptr nonnull addrspacecast (ptr addrspace(1) @value to ptr), i128 42)
26  ret void
27}
28
29define void @test_b128_input_from_load(ptr nocapture readonly %data) {
30; CHECK-LABEL: test_b128_input_from_load(
31; CHECK:       {
32; CHECK-NEXT:    .reg .b64 %rd<7>;
33; CHECK-NEXT:    .reg .b128 %rq<2>;
34; CHECK-EMPTY:
35; CHECK-NEXT:  // %bb.0:
36; CHECK-NEXT:    ld.param.u64 %rd2, [test_b128_input_from_load_param_0];
37; CHECK-NEXT:    cvta.to.global.u64 %rd3, %rd2;
38; CHECK-NEXT:    ld.global.u64 %rd4, [%rd3+8];
39; CHECK-NEXT:    ld.global.u64 %rd5, [%rd3];
40; CHECK-NEXT:    mov.b128 %rq1, {%rd5, %rd4};
41; CHECK-NEXT:    mov.u64 %rd6, value;
42; CHECK-NEXT:    cvta.global.u64 %rd1, %rd6;
43; CHECK-NEXT:    // begin inline asm
44; CHECK-NEXT:    { st.b128 [%rd1], %rq1; }
45; CHECK-NEXT:    // end inline asm
46; CHECK-NEXT:    ret;
47  %1 = addrspacecast ptr %data to ptr addrspace(1)
48  %2 = load <2 x i64>, ptr addrspace(1) %1, align 16
49  %3 = bitcast <2 x i64> %2 to i128
50  tail call void asm sideeffect "{ st.b128 [$0], $1; }", "l,q"(ptr nonnull addrspacecast (ptr addrspace(1) @value to ptr), i128 %3)
51  ret void
52}
53
54define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
55; CHECK-LABEL: test_b128_input_from_select(
56; CHECK:       {
57; CHECK-NEXT:    .reg .pred %p<2>;
58; CHECK-NEXT:    .reg .b16 %rs<2>;
59; CHECK-NEXT:    .reg .b64 %rd<7>;
60; CHECK-NEXT:    .reg .b128 %rq<2>;
61; CHECK-EMPTY:
62; CHECK-NEXT:  // %bb.0:
63; CHECK-NEXT:    ld.param.u64 %rd2, [test_b128_input_from_select_param_0];
64; CHECK-NEXT:    cvta.to.global.u64 %rd3, %rd2;
65; CHECK-NEXT:    ld.global.u8 %rs1, [%rd3];
66; CHECK-NEXT:    setp.eq.s16 %p1, %rs1, 0;
67; CHECK-NEXT:    selp.b64 %rd4, 24, 42, %p1;
68; CHECK-NEXT:    mov.b64 %rd5, 0;
69; CHECK-NEXT:    mov.b128 %rq1, {%rd4, %rd5};
70; CHECK-NEXT:    mov.u64 %rd6, value;
71; CHECK-NEXT:    cvta.global.u64 %rd1, %rd6;
72; CHECK-NEXT:    // begin inline asm
73; CHECK-NEXT:    { st.b128 [%rd1], %rq1; }
74; CHECK-NEXT:    // end inline asm
75; CHECK-NEXT:    ret;
76  %1 = addrspacecast ptr %flag to ptr addrspace(1)
77  %2 = load i8, ptr addrspace(1) %1, align 1
78  %3 = icmp eq i8 %2, 0
79  %4 = select i1 %3, i128 24, i128 42
80  tail call void asm sideeffect "{ st.b128 [$0], $1; }", "l,q"(ptr nonnull addrspacecast (ptr addrspace(1) @value to ptr), i128 %4)
81  ret void
82}
83
84define void @test_store_b128_output() {
85; CHECK-LABEL: test_store_b128_output(
86; CHECK:       {
87; CHECK-NEXT:    .reg .b64 %rd<5>;
88; CHECK-NEXT:    .reg .b128 %rq<2>;
89; CHECK-EMPTY:
90; CHECK-NEXT:  // %bb.0:
91; CHECK-NEXT:    // begin inline asm
92; CHECK-NEXT:    { mov.b128 %rq1, 41; }
93; CHECK-NEXT:    // end inline asm
94; CHECK-NEXT:    mov.b128 {%rd1, %rd2}, %rq1;
95; CHECK-NEXT:    add.cc.s64 %rd3, %rd1, 1;
96; CHECK-NEXT:    addc.cc.s64 %rd4, %rd2, 0;
97; CHECK-NEXT:    st.global.u64 [value+8], %rd4;
98; CHECK-NEXT:    st.global.u64 [value], %rd3;
99; CHECK-NEXT:    ret;
100  %1 = tail call i128 asm "{ mov.b128 $0, 41; }", "=q"()
101  %add = add nsw i128 %1, 1
102  %2 = bitcast i128 %add to <2 x i64>
103  store <2 x i64> %2, ptr addrspace(1) @value, align 16
104  ret void
105}
106
107define void @test_use_of_b128_output(ptr nocapture readonly %data) {
108; CHECK-LABEL: test_use_of_b128_output(
109; CHECK:       {
110; CHECK-NEXT:    .reg .b64 %rd<9>;
111; CHECK-NEXT:    .reg .b128 %rq<3>;
112; CHECK-EMPTY:
113; CHECK-NEXT:  // %bb.0:
114; CHECK-NEXT:    ld.param.u64 %rd1, [test_use_of_b128_output_param_0];
115; CHECK-NEXT:    cvta.to.global.u64 %rd2, %rd1;
116; CHECK-NEXT:    ld.global.u64 %rd3, [%rd2+8];
117; CHECK-NEXT:    ld.global.u64 %rd4, [%rd2];
118; CHECK-NEXT:    mov.b128 %rq2, {%rd4, %rd3};
119; CHECK-NEXT:    // begin inline asm
120; CHECK-NEXT:    { mov.b128 %rq1, %rq2; }
121; CHECK-NEXT:    // end inline asm
122; CHECK-NEXT:    mov.b128 {%rd5, %rd6}, %rq1;
123; CHECK-NEXT:    add.cc.s64 %rd7, %rd5, 1;
124; CHECK-NEXT:    addc.cc.s64 %rd8, %rd6, 0;
125; CHECK-NEXT:    st.global.u64 [value], %rd7;
126; CHECK-NEXT:    st.global.u64 [value+8], %rd8;
127; CHECK-NEXT:    ret;
128  %1 = addrspacecast ptr %data to ptr addrspace(1)
129  %2 = load <2 x i64>, ptr addrspace(1) %1, align 16
130  %3 = bitcast <2 x i64> %2 to i128
131  %4 = tail call i128 asm "{ mov.b128 $0, $1; }", "=q,q"(i128 %3)
132  %add = add nsw i128 %4, 1
133  %5 = bitcast i128 %add to <2 x i64>
134  store <2 x i64> %5, ptr addrspace(1) @value, align 16
135  ret void
136}
137