xref: /llvm-project/llvm/test/CodeGen/NVPTX/store-undef.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
4
5target triple = "nvptx64-nvidia-cuda"
6
7%struct.T = type { i64, <2 x i32>, <4 x i32> }
8
9declare void @test_call(%struct.T)
10
11define void @test_store_param_undef() {
12; CHECK-LABEL: test_store_param_undef(
13; CHECK:       {
14; CHECK-EMPTY:
15; CHECK-EMPTY:
16; CHECK-NEXT:  // %bb.0:
17; CHECK-NEXT:    { // callseq 0, 0
18; CHECK-NEXT:    .param .align 16 .b8 param0[32];
19; CHECK-NEXT:    call.uni
20; CHECK-NEXT:    test_call,
21; CHECK-NEXT:    (
22; CHECK-NEXT:    param0
23; CHECK-NEXT:    );
24; CHECK-NEXT:    } // callseq 0
25; CHECK-NEXT:    ret;
26  call void @test_call(%struct.T undef)
27  ret void
28}
29
30define void @test_store_param_def(i64 %param0, i32 %param1) {
31; CHECK-LABEL: test_store_param_def(
32; CHECK:       {
33; CHECK-NEXT:    .reg .b32 %r<6>;
34; CHECK-NEXT:    .reg .b64 %rd<2>;
35; CHECK-EMPTY:
36; CHECK-NEXT:  // %bb.0:
37; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_param_def_param_0];
38; CHECK-NEXT:    ld.param.u32 %r1, [test_store_param_def_param_1];
39; CHECK-NEXT:    { // callseq 1, 0
40; CHECK-NEXT:    .param .align 16 .b8 param0[32];
41; CHECK-NEXT:    st.param.b64 [param0], %rd1;
42; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r2, %r1};
43; CHECK-NEXT:    st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
44; CHECK-NEXT:    call.uni
45; CHECK-NEXT:    test_call,
46; CHECK-NEXT:    (
47; CHECK-NEXT:    param0
48; CHECK-NEXT:    );
49; CHECK-NEXT:    } // callseq 1
50; CHECK-NEXT:    ret;
51  %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
52  %V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
53  %S0 = insertvalue %struct.T undef, i64 %param0, 0
54  %S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
55  %S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
56  call void @test_call(%struct.T %S2)
57  ret void
58}
59
60define void @test_store_undef(ptr %out) {
61; CHECK-LABEL: test_store_undef(
62; CHECK:       {
63; CHECK-EMPTY:
64; CHECK-EMPTY:
65; CHECK-NEXT:  // %bb.0:
66; CHECK-NEXT:    ret;
67  store %struct.T undef, ptr %out
68  ret void
69}
70
71define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
72; CHECK-LABEL: test_store_def(
73; CHECK:       {
74; CHECK-NEXT:    .reg .b32 %r<6>;
75; CHECK-NEXT:    .reg .b64 %rd<3>;
76; CHECK-EMPTY:
77; CHECK-NEXT:  // %bb.0:
78; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_def_param_0];
79; CHECK-NEXT:    ld.param.u32 %r1, [test_store_def_param_1];
80; CHECK-NEXT:    ld.param.u64 %rd2, [test_store_def_param_2];
81; CHECK-NEXT:    st.v4.u32 [%rd2+16], {%r2, %r1, %r3, %r4};
82; CHECK-NEXT:    st.v2.u32 [%rd2+8], {%r5, %r1};
83; CHECK-NEXT:    st.u64 [%rd2], %rd1;
84; CHECK-NEXT:    ret;
85  %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
86  %V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
87  %S0 = insertvalue %struct.T undef, i64 %param0, 0
88  %S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
89  %S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
90  store %struct.T %S2, ptr %out
91  ret void
92}
93
94define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) {
95; CHECK-LABEL: test_store_volatile_undef(
96; CHECK:       {
97; CHECK-NEXT:    .reg .b32 %r<23>;
98; CHECK-NEXT:    .reg .b64 %rd<5>;
99; CHECK-EMPTY:
100; CHECK-NEXT:  // %bb.0:
101; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_volatile_undef_param_0];
102; CHECK-NEXT:    st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
103; CHECK-NEXT:    st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
104; CHECK-NEXT:    st.volatile.u64 [%rd1], %rd2;
105; CHECK-NEXT:    ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1];
106; CHECK-NEXT:    ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16];
107; CHECK-NEXT:    st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14};
108; CHECK-NEXT:    st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10};
109; CHECK-NEXT:    st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18};
110; CHECK-NEXT:    st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22};
111; CHECK-NEXT:    ret;
112  store volatile %struct.T undef, ptr %out
113  store volatile <8 x i32> %vec, ptr undef
114  store volatile <8 x i32> undef, ptr %out
115  ret void
116}
117
118define void @test_store_volatile_of_poison(ptr %out) {
119; CHECK-LABEL: test_store_volatile_of_poison(
120; CHECK:       {
121; CHECK-NEXT:    .reg .b32 %r<7>;
122; CHECK-NEXT:    .reg .b64 %rd<3>;
123; CHECK-EMPTY:
124; CHECK-NEXT:  // %bb.0:
125; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0];
126; CHECK-NEXT:    st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
127; CHECK-NEXT:    st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
128; CHECK-NEXT:    st.volatile.u64 [%rd1], %rd2;
129; CHECK-NEXT:    ret;
130  store volatile %struct.T poison, ptr %out
131  ret void
132}
133
134define void @test_store_volatile_to_poison(%struct.T %param) {
135; CHECK-LABEL: test_store_volatile_to_poison(
136; CHECK:       {
137; CHECK-NEXT:    .reg .b32 %r<7>;
138; CHECK-NEXT:    .reg .b64 %rd<5>;
139; CHECK-EMPTY:
140; CHECK-NEXT:  // %bb.0:
141; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0];
142; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8];
143; CHECK-NEXT:    ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16];
144; CHECK-NEXT:    st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6};
145; CHECK-NEXT:    st.volatile.v2.u32 [%rd3], {%r1, %r2};
146; CHECK-NEXT:    st.volatile.u64 [%rd4], %rd1;
147; CHECK-NEXT:    ret;
148  store volatile %struct.T %param, ptr poison
149  ret void
150}
151