xref: /llvm-project/llvm/test/CodeGen/NVPTX/lower-byval-args.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5
2; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
3; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
4; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-copy-byval-args -S | FileCheck %s --check-prefixes=COMMON,COPY
5source_filename = "<stdin>"
6target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
7target triple = "nvptx64-nvidia-cuda"
8
9%struct.S = type { i32, i32 }
10
11; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
12declare dso_local void @_Z6escapePv(ptr noundef) local_unnamed_addr #0
13
14; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
15declare dso_local void @_Z6escapei(i32 noundef) local_unnamed_addr #0
16
17; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
18declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
19
20; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
21declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
22
23; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
24declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
25
26; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
27define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
28; SM_60-LABEL: define dso_local ptx_kernel void @read_only(
29; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
30; SM_60-NEXT:  [[ENTRY:.*:]]
31; SM_60-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
32; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
33; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
34; SM_60-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
35; SM_60-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
36; SM_60-NEXT:    ret void
37;
38; SM_70-LABEL: define dso_local ptx_kernel void @read_only(
39; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
40; SM_70-NEXT:  [[ENTRY:.*:]]
41; SM_70-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
42; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
43; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
44; SM_70-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
45; SM_70-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
46; SM_70-NEXT:    ret void
47;
48; COPY-LABEL: define dso_local ptx_kernel void @read_only(
49; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
50; COPY-NEXT:  [[ENTRY:.*:]]
51; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
52; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
53; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
54; COPY-NEXT:    [[I:%.*]] = load i32, ptr [[S1]], align 4
55; COPY-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
56; COPY-NEXT:    ret void
57;
58entry:
59  %i = load i32, ptr %s, align 4
60  store i32 %i, ptr %out, align 4
61  ret void
62}
63
64; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
65define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
66; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep(
67; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
68; SM_60-NEXT:  [[ENTRY:.*:]]
69; SM_60-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
70; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
71; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
72; SM_60-NEXT:    [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
73; SM_60-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
74; SM_60-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
75; SM_60-NEXT:    ret void
76;
77; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep(
78; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
79; SM_70-NEXT:  [[ENTRY:.*:]]
80; SM_70-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
81; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
82; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
83; SM_70-NEXT:    [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
84; SM_70-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
85; SM_70-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
86; SM_70-NEXT:    ret void
87;
88; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep(
89; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
90; COPY-NEXT:  [[ENTRY:.*:]]
91; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
92; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
93; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
94; COPY-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
95; COPY-NEXT:    [[I:%.*]] = load i32, ptr [[B]], align 4
96; COPY-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
97; COPY-NEXT:    ret void
98;
99entry:
100  %b = getelementptr inbounds nuw i8, ptr %s, i64 4
101  %i = load i32, ptr %b, align 4
102  store i32 %i, ptr %out, align 4
103  ret void
104}
105
106; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
107define dso_local ptx_kernel void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
108; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
109; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
110; SM_60-NEXT:  [[ENTRY:.*:]]
111; SM_60-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
112; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
113; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
114; SM_60-NEXT:    [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
115; SM_60-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
116; SM_60-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
117; SM_60-NEXT:    ret void
118;
119; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
120; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
121; SM_70-NEXT:  [[ENTRY:.*:]]
122; SM_70-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
123; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
124; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
125; SM_70-NEXT:    [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
126; SM_70-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
127; SM_70-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
128; SM_70-NEXT:    ret void
129;
130; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc(
131; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
132; COPY-NEXT:  [[ENTRY:.*:]]
133; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
134; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
135; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
136; COPY-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
137; COPY-NEXT:    [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
138; COPY-NEXT:    [[I:%.*]] = load i32, ptr addrspace(101) [[ASC]], align 4
139; COPY-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
140; COPY-NEXT:    ret void
141;
142entry:
143  %b = getelementptr inbounds nuw i8, ptr %s, i64 4
144  %asc = addrspacecast ptr %b to ptr addrspace(101)
145  %i = load i32, ptr addrspace(101) %asc, align 4
146  store i32 %i, ptr %out, align 4
147  ret void
148}
149
150; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
151define dso_local ptx_kernel void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
152; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
153; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
154; SM_60-NEXT:  [[ENTRY:.*:]]
155; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
156; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
157; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
158; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
159; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
160; SM_60-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
161; SM_60-NEXT:    [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
162; SM_60-NEXT:    [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
163; SM_60-NEXT:    [[I:%.*]] = load i32, ptr [[ASC0]], align 4
164; SM_60-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
165; SM_60-NEXT:    ret void
166;
167; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
168; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
169; SM_70-NEXT:  [[ENTRY:.*:]]
170; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
171; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
172; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
173; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
174; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
175; SM_70-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
176; SM_70-NEXT:    [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
177; SM_70-NEXT:    [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
178; SM_70-NEXT:    [[I:%.*]] = load i32, ptr [[ASC0]], align 4
179; SM_70-NEXT:    store i32 [[I]], ptr [[OUT2]], align 4
180; SM_70-NEXT:    ret void
181;
182; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
183; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
184; COPY-NEXT:  [[ENTRY:.*:]]
185; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
186; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
187; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
188; COPY-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
189; COPY-NEXT:    [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
190; COPY-NEXT:    [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
191; COPY-NEXT:    [[I:%.*]] = load i32, ptr [[ASC0]], align 4
192; COPY-NEXT:    store i32 [[I]], ptr [[OUT]], align 4
193; COPY-NEXT:    ret void
194;
195entry:
196  %b = getelementptr inbounds nuw i8, ptr %s, i64 4
197  %asc = addrspacecast ptr %b to ptr addrspace(101)
198  %asc0 = addrspacecast ptr addrspace(101) %asc to ptr
199  %i = load i32, ptr %asc0, align 4
200  store i32 %i, ptr %out, align 4
201  ret void
202}
203
204; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
205define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
206; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr(
207; SM_60-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
208; SM_60-NEXT:  [[ENTRY:.*:]]
209; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
210; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
211; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
212; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
213; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
214; SM_60-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[S3]])
215; SM_60-NEXT:    ret void
216;
217; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr(
218; SM_70-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
219; SM_70-NEXT:  [[ENTRY:.*:]]
220; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
221; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
222; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
223; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
224; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
225; SM_70-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[S3]])
226; SM_70-NEXT:    ret void
227;
228; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr(
229; COPY-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
230; COPY-NEXT:  [[ENTRY:.*:]]
231; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
232; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
233; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
234; COPY-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[S1]])
235; COPY-NEXT:    ret void
236;
237entry:
238  call void @_Z6escapePv(ptr noundef nonnull %s) #0
239  ret void
240}
241
242; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
243define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
244; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
245; SM_60-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
246; SM_60-NEXT:  [[ENTRY:.*:]]
247; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
248; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
249; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
250; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
251; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
252; SM_60-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
253; SM_60-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[B]])
254; SM_60-NEXT:    ret void
255;
256; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
257; SM_70-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
258; SM_70-NEXT:  [[ENTRY:.*:]]
259; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
260; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
261; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
262; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
263; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
264; SM_70-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
265; SM_70-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[B]])
266; SM_70-NEXT:    ret void
267;
268; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep(
269; COPY-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
270; COPY-NEXT:  [[ENTRY:.*:]]
271; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
272; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
273; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
274; COPY-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
275; COPY-NEXT:    call void @_Z6escapePv(ptr noundef nonnull [[B]])
276; COPY-NEXT:    ret void
277;
278entry:
279  %b = getelementptr inbounds nuw i8, ptr %s, i64 4
280  call void @_Z6escapePv(ptr noundef nonnull %b) #0
281  ret void
282}
283
284; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
285define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
286; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
287; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
288; SM_60-NEXT:  [[ENTRY:.*:]]
289; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
290; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
291; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
292; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
293; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
294; SM_60-NEXT:    store ptr [[S3]], ptr [[OUT2]], align 8
295; SM_60-NEXT:    ret void
296;
297; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
298; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
299; SM_70-NEXT:  [[ENTRY:.*:]]
300; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
301; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
302; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
303; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
304; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
305; SM_70-NEXT:    store ptr [[S3]], ptr [[OUT2]], align 8
306; SM_70-NEXT:    ret void
307;
308; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_store(
309; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
310; COPY-NEXT:  [[ENTRY:.*:]]
311; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
312; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
313; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
314; COPY-NEXT:    store ptr [[S1]], ptr [[OUT]], align 8
315; COPY-NEXT:    ret void
316;
317entry:
318  store ptr %s, ptr %out, align 8
319  ret void
320}
321
322; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
323define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
324; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
325; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
326; SM_60-NEXT:  [[ENTRY:.*:]]
327; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
328; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
329; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
330; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
331; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
332; SM_60-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
333; SM_60-NEXT:    store ptr [[B]], ptr [[OUT2]], align 8
334; SM_60-NEXT:    ret void
335;
336; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
337; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
338; SM_70-NEXT:  [[ENTRY:.*:]]
339; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
340; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
341; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
342; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
343; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
344; SM_70-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
345; SM_70-NEXT:    store ptr [[B]], ptr [[OUT2]], align 8
346; SM_70-NEXT:    ret void
347;
348; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store(
349; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
350; COPY-NEXT:  [[ENTRY:.*:]]
351; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
352; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
353; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
354; COPY-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
355; COPY-NEXT:    store ptr [[B]], ptr [[OUT]], align 8
356; COPY-NEXT:    ret void
357;
358entry:
359  %b = getelementptr inbounds nuw i8, ptr %s, i64 4
360  store ptr %b, ptr %out, align 8
361  ret void
362}
363
364; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
365define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
366; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
367; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
368; SM_60-NEXT:  [[ENTRY:.*:]]
369; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
370; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
371; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
372; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
373; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
374; SM_60-NEXT:    [[I:%.*]] = ptrtoint ptr [[S3]] to i64
375; SM_60-NEXT:    store i64 [[I]], ptr [[OUT2]], align 8
376; SM_60-NEXT:    ret void
377;
378; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
379; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
380; SM_70-NEXT:  [[ENTRY:.*:]]
381; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
382; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
383; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
384; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
385; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
386; SM_70-NEXT:    [[I:%.*]] = ptrtoint ptr [[S3]] to i64
387; SM_70-NEXT:    store i64 [[I]], ptr [[OUT2]], align 8
388; SM_70-NEXT:    ret void
389;
390; COPY-LABEL: define dso_local ptx_kernel void @escape_ptrtoint(
391; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
392; COPY-NEXT:  [[ENTRY:.*:]]
393; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
394; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
395; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
396; COPY-NEXT:    [[I:%.*]] = ptrtoint ptr [[S1]] to i64
397; COPY-NEXT:    store i64 [[I]], ptr [[OUT]], align 8
398; COPY-NEXT:    ret void
399;
400entry:
401  %i = ptrtoint ptr %s to i64
402  store i64 %i, ptr %out, align 8
403  ret void
404}
405
406; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
407define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
408; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
409; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
410; SM_60-NEXT:  [[ENTRY:.*:]]
411; SM_60-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
412; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
413; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
414; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
415; SM_60-NEXT:    ret void
416;
417; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
418; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
419; SM_70-NEXT:  [[ENTRY:.*:]]
420; SM_70-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
421; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
422; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
423; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
424; SM_70-NEXT:    ret void
425;
426; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
427; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
428; COPY-NEXT:  [[ENTRY:.*:]]
429; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
430; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
431; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
432; COPY-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[OUT]], ptr [[S1]], i64 16, i1 true)
433; COPY-NEXT:    ret void
434;
435entry:
436  tail call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %s, i64 16, i1 true)
437  ret void
438}
439
440; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
441define dso_local ptx_kernel void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 {
442; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
443; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
444; SM_60-NEXT:  [[ENTRY:.*:]]
445; SM_60-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
446; SM_60-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
447; SM_60-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
448; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
449; SM_60-NEXT:    ret void
450;
451; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
452; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
453; SM_70-NEXT:  [[ENTRY:.*:]]
454; SM_70-NEXT:    [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
455; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
456; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
457; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true)
458; SM_70-NEXT:    ret void
459;
460; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign(
461; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
462; COPY-NEXT:  [[ENTRY:.*:]]
463; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 8
464; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
465; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[S1]], ptr addrspace(101) align 8 [[S2]], i64 8, i1 false)
466; COPY-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[OUT]], ptr [[S1]], i64 16, i1 true)
467; COPY-NEXT:    ret void
468;
469entry:
470  tail call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %s, i64 16, i1 true)
471  ret void
472}
473
474; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
475define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
476; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
477; SM_60-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef readnone byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
478; SM_60-NEXT:  [[ENTRY:.*:]]
479; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
480; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
481; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
482; SM_60-NEXT:    [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
483; SM_60-NEXT:    [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
484; SM_60-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
485; SM_60-NEXT:    ret void
486;
487; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
488; SM_70-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef readnone byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
489; SM_70-NEXT:  [[ENTRY:.*:]]
490; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
491; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
492; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
493; SM_70-NEXT:    [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
494; SM_70-NEXT:    [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
495; SM_70-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
496; SM_70-NEXT:    ret void
497;
498; COPY-LABEL: define dso_local ptx_kernel void @memcpy_to_param(
499; COPY-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef readnone byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
500; COPY-NEXT:  [[ENTRY:.*:]]
501; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
502; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
503; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
504; COPY-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true)
505; COPY-NEXT:    ret void
506;
507entry:
508  tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true)
509  ret void
510}
511
512; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
513define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 {
514; SM_60-LABEL: define dso_local ptx_kernel void @copy_on_store(
515; SM_60-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
516; SM_60-NEXT:  [[BB:.*:]]
517; SM_60-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
518; SM_60-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
519; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
520; SM_60-NEXT:    [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
521; SM_60-NEXT:    [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
522; SM_60-NEXT:    [[I:%.*]] = load i32, ptr [[IN2]], align 4
523; SM_60-NEXT:    store i32 [[I]], ptr [[S3]], align 4
524; SM_60-NEXT:    ret void
525;
526; SM_70-LABEL: define dso_local ptx_kernel void @copy_on_store(
527; SM_70-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
528; SM_70-NEXT:  [[BB:.*:]]
529; SM_70-NEXT:    [[S3:%.*]] = alloca [[STRUCT_S]], align 4
530; SM_70-NEXT:    [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
531; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
532; SM_70-NEXT:    [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
533; SM_70-NEXT:    [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
534; SM_70-NEXT:    [[I:%.*]] = load i32, ptr [[IN2]], align 4
535; SM_70-NEXT:    store i32 [[I]], ptr [[S3]], align 4
536; SM_70-NEXT:    ret void
537;
538; COPY-LABEL: define dso_local ptx_kernel void @copy_on_store(
539; COPY-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
540; COPY-NEXT:  [[BB:.*:]]
541; COPY-NEXT:    [[S1:%.*]] = alloca [[STRUCT_S]], align 4
542; COPY-NEXT:    [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
543; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
544; COPY-NEXT:    [[I:%.*]] = load i32, ptr [[IN]], align 4
545; COPY-NEXT:    store i32 [[I]], ptr [[S1]], align 4
546; COPY-NEXT:    ret void
547;
548bb:
549  %i = load i32, ptr %in, align 4
550  store i32 %i, ptr %s, align 4
551  ret void
552}
553
554define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
555; SM_60-LABEL: define ptx_kernel void @test_select(
556; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
557; SM_60-NEXT:  [[BB:.*:]]
558; SM_60-NEXT:    [[OUT7:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
559; SM_60-NEXT:    [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
560; SM_60-NEXT:    [[INPUT24:%.*]] = alloca i32, align 4
561; SM_60-NEXT:    [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
562; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
563; SM_60-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
564; SM_60-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
565; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
566; SM_60-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
567; SM_60-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
568; SM_60-NEXT:    store i32 [[VALLOADED]], ptr [[OUT8]], align 4
569; SM_60-NEXT:    ret void
570;
571; SM_70-LABEL: define ptx_kernel void @test_select(
572; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
573; SM_70-NEXT:  [[BB:.*:]]
574; SM_70-NEXT:    [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
575; SM_70-NEXT:    [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
576; SM_70-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
577; SM_70-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
578; SM_70-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
579; SM_70-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
580; SM_70-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
581; SM_70-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
582; SM_70-NEXT:    store i32 [[VALLOADED]], ptr [[OUT2]], align 4
583; SM_70-NEXT:    ret void
584;
585; COPY-LABEL: define ptx_kernel void @test_select(
586; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
587; COPY-NEXT:  [[BB:.*:]]
588; COPY-NEXT:    [[INPUT23:%.*]] = alloca i32, align 4
589; COPY-NEXT:    [[INPUT24:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
590; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
591; COPY-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
592; COPY-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
593; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
594; COPY-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
595; COPY-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
596; COPY-NEXT:    store i32 [[VALLOADED]], ptr [[OUT]], align 4
597; COPY-NEXT:    ret void
598;
599bb:
600  %ptrnew = select i1 %cond, ptr %input1, ptr %input2
601  %valloaded = load i32, ptr %ptrnew, align 4
602  store i32 %valloaded, ptr %out, align 4
603  ret void
604}
605
606define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) {
607; SM_60-LABEL: define ptx_kernel void @test_select_write(
608; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
609; SM_60-NEXT:  [[BB:.*:]]
610; SM_60-NEXT:    [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
611; SM_60-NEXT:    [[OUT6:%.*]] = addrspacecast ptr addrspace(1) [[OUT5]] to ptr
612; SM_60-NEXT:    [[INPUT23:%.*]] = alloca i32, align 4
613; SM_60-NEXT:    [[INPUT24:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
614; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
615; SM_60-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
616; SM_60-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
617; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
618; SM_60-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
619; SM_60-NEXT:    store i32 1, ptr [[PTRNEW]], align 4
620; SM_60-NEXT:    ret void
621;
622; SM_70-LABEL: define ptx_kernel void @test_select_write(
623; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
624; SM_70-NEXT:  [[BB:.*:]]
625; SM_70-NEXT:    [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
626; SM_70-NEXT:    [[OUT6:%.*]] = addrspacecast ptr addrspace(1) [[OUT5]] to ptr
627; SM_70-NEXT:    [[INPUT23:%.*]] = alloca i32, align 4
628; SM_70-NEXT:    [[INPUT24:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
629; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
630; SM_70-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
631; SM_70-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
632; SM_70-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
633; SM_70-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
634; SM_70-NEXT:    store i32 1, ptr [[PTRNEW]], align 4
635; SM_70-NEXT:    ret void
636;
637; COPY-LABEL: define ptx_kernel void @test_select_write(
638; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
639; COPY-NEXT:  [[BB:.*:]]
640; COPY-NEXT:    [[INPUT23:%.*]] = alloca i32, align 4
641; COPY-NEXT:    [[INPUT24:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
642; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
643; COPY-NEXT:    [[INPUT11:%.*]] = alloca i32, align 4
644; COPY-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
645; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
646; COPY-NEXT:    [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
647; COPY-NEXT:    store i32 1, ptr [[PTRNEW]], align 4
648; COPY-NEXT:    ret void
649;
650bb:
651  %ptrnew = select i1 %cond, ptr %input1, ptr %input2
652  store i32 1, ptr %ptrnew, align 4
653  ret void
654}
655
656define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) {
657; SM_60-LABEL: define ptx_kernel void @test_phi(
658; SM_60-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
659; SM_60-NEXT:  [[BB:.*:]]
660; SM_60-NEXT:    [[INOUT7:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
661; SM_60-NEXT:    [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr
662; SM_60-NEXT:    [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
663; SM_60-NEXT:    [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
664; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
665; SM_60-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
666; SM_60-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
667; SM_60-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
668; SM_60-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
669; SM_60:       [[FIRST]]:
670; SM_60-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
671; SM_60-NEXT:    br label %[[MERGE:.*]]
672; SM_60:       [[SECOND]]:
673; SM_60-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1
674; SM_60-NEXT:    br label %[[MERGE]]
675; SM_60:       [[MERGE]]:
676; SM_60-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
677; SM_60-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
678; SM_60-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT8]], align 4
679; SM_60-NEXT:    ret void
680;
681; SM_70-LABEL: define ptx_kernel void @test_phi(
682; SM_70-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
683; SM_70-NEXT:  [[BB:.*:]]
684; SM_70-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
685; SM_70-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
686; SM_70-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
687; SM_70-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
688; SM_70-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
689; SM_70-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
690; SM_70-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
691; SM_70:       [[FIRST]]:
692; SM_70-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0
693; SM_70-NEXT:    br label %[[MERGE:.*]]
694; SM_70:       [[SECOND]]:
695; SM_70-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT2_PARAM_GEN]], i32 0, i32 1
696; SM_70-NEXT:    br label %[[MERGE]]
697; SM_70:       [[MERGE]]:
698; SM_70-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
699; SM_70-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
700; SM_70-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
701; SM_70-NEXT:    ret void
702;
703; COPY-LABEL: define ptx_kernel void @test_phi(
704; COPY-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
705; COPY-NEXT:  [[BB:.*:]]
706; COPY-NEXT:    [[INPUT23:%.*]] = alloca [[STRUCT_S]], align 8
707; COPY-NEXT:    [[INPUT24:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
708; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false)
709; COPY-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
710; COPY-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
711; COPY-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
712; COPY-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
713; COPY:       [[FIRST]]:
714; COPY-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
715; COPY-NEXT:    br label %[[MERGE:.*]]
716; COPY:       [[SECOND]]:
717; COPY-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT23]], i32 0, i32 1
718; COPY-NEXT:    br label %[[MERGE]]
719; COPY:       [[MERGE]]:
720; COPY-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
721; COPY-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
722; COPY-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
723; COPY-NEXT:    ret void
724;
725bb:
726  br i1 %cond, label %first, label %second
727
728first:                                            ; preds = %bb
729  %ptr1 = getelementptr inbounds %struct.S, ptr %input1, i32 0, i32 0
730  br label %merge
731
732second:                                           ; preds = %bb
733  %ptr2 = getelementptr inbounds %struct.S, ptr %input2, i32 0, i32 1
734  br label %merge
735
736merge:                                            ; preds = %second, %first
737  %ptrnew = phi ptr [ %ptr1, %first ], [ %ptr2, %second ]
738  %valloaded = load i32, ptr %ptrnew, align 4
739  store i32 %valloaded, ptr %inout, align 4
740  ret void
741}
742
743define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) {
744; COMMON-LABEL: define ptx_kernel void @test_phi_write(
745; COMMON-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
746; COMMON-NEXT:  [[BB:.*:]]
747; COMMON-NEXT:    [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
748; COMMON-NEXT:    [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
749; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
750; COMMON-NEXT:    [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
751; COMMON-NEXT:    [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
752; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
753; COMMON-NEXT:    br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
754; COMMON:       [[FIRST]]:
755; COMMON-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
756; COMMON-NEXT:    br label %[[MERGE:.*]]
757; COMMON:       [[SECOND]]:
758; COMMON-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1
759; COMMON-NEXT:    br label %[[MERGE]]
760; COMMON:       [[MERGE]]:
761; COMMON-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
762; COMMON-NEXT:    store i32 1, ptr [[PTRNEW]], align 4
763; COMMON-NEXT:    ret void
764;
765bb:
766  br i1 %cond, label %first, label %second
767
768first:                                            ; preds = %bb
769  %ptr1 = getelementptr inbounds %struct.S, ptr %input1, i32 0, i32 0
770  br label %merge
771
772second:                                           ; preds = %bb
773  %ptr2 = getelementptr inbounds %struct.S, ptr %input2, i32 0, i32 1
774  br label %merge
775
776merge:                                            ; preds = %second, %first
777  %ptrnew = phi ptr [ %ptr1, %first ], [ %ptr2, %second ]
778  store i32 1, ptr %ptrnew, align 4
779  ret void
780}
781
782attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size"="true" }
783attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
784attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
785
786!llvm.module.flags = !{!0, !1, !2, !3}
787!llvm.ident = !{!20, !21}
788
789!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 8]}
790!1 = !{i32 1, !"wchar_size", i32 4}
791!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
792!3 = !{i32 7, !"frame-pointer", i32 2}
793!20 = !{!"clang version 20.0.0git"}
794!21 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
795