xref: /llvm-project/llvm/test/CodeGen/NVPTX/addrspacecast.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32
2; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64
3; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64
4; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
5; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
6; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
7
8; ALL-LABEL: conv1
9define i32 @conv1(ptr addrspace(1) %ptr) {
10; G32: cvta.global.u32
11; ALL-NOT: cvt.u64.u32
12; G64: cvta.global.u64
13; ALL: ld.u32
14  %genptr = addrspacecast ptr addrspace(1) %ptr to ptr
15  %val = load i32, ptr %genptr
16  ret i32 %val
17}
18
19; ALL-LABEL: conv2
20define i32 @conv2(ptr addrspace(3) %ptr) {
21; CLS32: cvta.shared.u32
22; PTRCONV: cvt.u64.u32
23; NOPTRCONV-NOT: cvt.u64.u32
24; CLS64: cvta.shared.u64
25; ALL: ld.u32
26  %genptr = addrspacecast ptr addrspace(3) %ptr to ptr
27  %val = load i32, ptr %genptr
28  ret i32 %val
29}
30
31; ALL-LABEL: conv3
32define i32 @conv3(ptr addrspace(4) %ptr) {
33; CLS32: cvta.const.u32
34; PTRCONV: cvt.u64.u32
35; NOPTRCONV-NOT: cvt.u64.u32
36; CLS64: cvta.const.u64
37; ALL: ld.u32
38  %genptr = addrspacecast ptr addrspace(4) %ptr to ptr
39  %val = load i32, ptr %genptr
40  ret i32 %val
41}
42
43; ALL-LABEL: conv4
44define i32 @conv4(ptr addrspace(5) %ptr) {
45; CLS32: cvta.local.u32
46; PTRCONV: cvt.u64.u32
47; NOPTRCONV-NOT: cvt.u64.u32
48; CLS64: cvta.local.u64
49; ALL: ld.u32
50  %genptr = addrspacecast ptr addrspace(5) %ptr to ptr
51  %val = load i32, ptr %genptr
52  ret i32 %val
53}
54
55; ALL-LABEL: conv5
56define i32 @conv5(ptr %ptr) {
57; CLS32: cvta.to.global.u32
58; ALL-NOT: cvt.u64.u32
59; CLS64: cvta.to.global.u64
60; ALL: ld.global.u32
61  %specptr = addrspacecast ptr %ptr to ptr addrspace(1)
62  %val = load i32, ptr addrspace(1) %specptr
63  ret i32 %val
64}
65
66; ALL-LABEL: conv6
67define i32 @conv6(ptr %ptr) {
68; CLS32: cvta.to.shared.u32
69; CLS64: cvta.to.shared.u64
70; PTRCONV: cvt.u32.u64
71; NOPTRCONV-NOT: cvt.u32.u64
72; ALL: ld.shared.u32
73  %specptr = addrspacecast ptr %ptr to ptr addrspace(3)
74  %val = load i32, ptr addrspace(3) %specptr
75  ret i32 %val
76}
77
78; ALL-LABEL: conv7
79define i32 @conv7(ptr %ptr) {
80; CLS32: cvta.to.const.u32
81; CLS64: cvta.to.const.u64
82; PTRCONV: cvt.u32.u64
83; NOPTRCONV-NOT: cvt.u32.u64
84; ALL: ld.const.u32
85  %specptr = addrspacecast ptr %ptr to ptr addrspace(4)
86  %val = load i32, ptr addrspace(4) %specptr
87  ret i32 %val
88}
89
90; ALL-LABEL: conv8
91define i32 @conv8(ptr %ptr) {
92; CLS32: cvta.to.local.u32
93; CLS64: cvta.to.local.u64
94; PTRCONV: cvt.u32.u64
95; NOPTRCONV-NOT: cvt.u32.u64
96; ALL: ld.local.u32
97  %specptr = addrspacecast ptr %ptr to ptr addrspace(5)
98  %val = load i32, ptr addrspace(5) %specptr
99  ret i32 %val
100}
101
102; Check that we support addrspacecast when splitting the vector
103; result (<2 x ptr> => 2 x <1 x ptr>).
104; This also checks that scalarization works for addrspacecast
105; (when going from <1 x ptr> to ptr.)
106; ALL-LABEL: split1To0
107define void @split1To0(ptr nocapture noundef readonly %xs) {
108; CLS32: cvta.global.u32
109; CLS32: cvta.global.u32
110; CLS64: cvta.global.u64
111; CLS64: cvta.global.u64
112; ALL: st.u32
113; ALL: st.u32
114  %vec_addr = load <2 x ptr addrspace(1)>, ptr %xs, align 16
115  %addrspacecast = addrspacecast <2 x ptr addrspace(1)> %vec_addr to <2 x ptr>
116  %extractelement0 = extractelement <2 x ptr> %addrspacecast, i64 0
117  store float 0.5, ptr %extractelement0, align 4
118  %extractelement1 = extractelement <2 x ptr> %addrspacecast, i64 1
119  store float 1.0, ptr %extractelement1, align 4
120  ret void
121}
122
123; Same as split1To0 but from 0 to 1, to make sure the addrspacecast preserve
124; the source and destination addrspaces properly.
125; ALL-LABEL: split0To1
126define void @split0To1(ptr nocapture noundef readonly %xs) {
127; CLS32: cvta.to.global.u32
128; CLS32: cvta.to.global.u32
129; CLS64: cvta.to.global.u64
130; CLS64: cvta.to.global.u64
131; ALL: st.global.u32
132; ALL: st.global.u32
133  %vec_addr = load <2 x ptr>, ptr %xs, align 16
134  %addrspacecast = addrspacecast <2 x ptr> %vec_addr to <2 x ptr addrspace(1)>
135  %extractelement0 = extractelement <2 x ptr addrspace(1)> %addrspacecast, i64 0
136  store float 0.5, ptr addrspace(1) %extractelement0, align 4
137  %extractelement1 = extractelement <2 x ptr addrspace(1)> %addrspacecast, i64 1
138  store float 1.0, ptr addrspace(1) %extractelement1, align 4
139  ret void
140}
141
142; Check that we support addrspacecast when a widening is required
143; (3 x ptr => 4 x ptr).
144; ALL-LABEL: widen1To0
145define void @widen1To0(ptr nocapture noundef readonly %xs) {
146; CLS32: cvta.global.u32
147; CLS32: cvta.global.u32
148; CLS32: cvta.global.u32
149
150; CLS64: cvta.global.u64
151; CLS64: cvta.global.u64
152; CLS64: cvta.global.u64
153
154; ALL: st.u32
155; ALL: st.u32
156; ALL: st.u32
157  %vec_addr = load <3 x ptr addrspace(1)>, ptr %xs, align 16
158  %addrspacecast = addrspacecast <3 x ptr addrspace(1)> %vec_addr to <3 x ptr>
159  %extractelement0 = extractelement <3 x ptr> %addrspacecast, i64 0
160  store float 0.5, ptr %extractelement0, align 4
161  %extractelement1 = extractelement <3 x ptr> %addrspacecast, i64 1
162  store float 1.0, ptr %extractelement1, align 4
163  %extractelement2 = extractelement <3 x ptr> %addrspacecast, i64 2
164  store float 1.5, ptr %extractelement2, align 4
165  ret void
166}
167
168; Same as widen1To0 but from 0 to 1, to make sure the addrspacecast preserve
169; the source and destination addrspaces properly.
170; ALL-LABEL: widen0To1
171define void @widen0To1(ptr nocapture noundef readonly %xs) {
172; CLS32: cvta.to.global.u32
173; CLS32: cvta.to.global.u32
174; CLS32: cvta.to.global.u32
175
176; CLS64: cvta.to.global.u64
177; CLS64: cvta.to.global.u64
178; CLS64: cvta.to.global.u64
179
180; ALL: st.global.u32
181; ALL: st.global.u32
182; ALL: st.global.u32
183  %vec_addr = load <3 x ptr>, ptr %xs, align 16
184  %addrspacecast = addrspacecast <3 x ptr> %vec_addr to <3 x ptr addrspace(1)>
185  %extractelement0 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 0
186  store float 0.5, ptr addrspace(1) %extractelement0, align 4
187  %extractelement1 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 1
188  store float 1.0, ptr addrspace(1) %extractelement1, align 4
189  %extractelement2 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 2
190  store float 1.5, ptr addrspace(1) %extractelement2, align 4
191  ret void
192}
193