xref: /llvm-project/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu (revision 4490003a22658dcd12527029b2c8682b63d8a9d6)
1f616c3eeSSaiyedul Islam // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
2f616c3eeSSaiyedul Islam // RUN:   -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s
3f616c3eeSSaiyedul Islam 
4f616c3eeSSaiyedul Islam // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
5f616c3eeSSaiyedul Islam // RUN:   -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
6f616c3eeSSaiyedul Islam 
7f616c3eeSSaiyedul Islam // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
8500846d2SPierre van Houtryve // RUN:   -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
9500846d2SPierre van Houtryve 
10500846d2SPierre van Houtryve // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
11f616c3eeSSaiyedul Islam // RUN:   -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
12f616c3eeSSaiyedul Islam 
13f616c3eeSSaiyedul Islam // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
14f616c3eeSSaiyedul Islam // RUN:   %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\
15f616c3eeSSaiyedul Islam // RUN:   FileCheck -check-prefix=LINKED4 %s
16f616c3eeSSaiyedul Islam 
17f616c3eeSSaiyedul Islam // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
18f616c3eeSSaiyedul Islam // RUN:   %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
19f616c3eeSSaiyedul Islam // RUN:   FileCheck -check-prefix=LINKED5 %s
20f616c3eeSSaiyedul Islam 
21500846d2SPierre van Houtryve // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
22500846d2SPierre van Houtryve // RUN:   %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
23500846d2SPierre van Houtryve // RUN:   FileCheck -check-prefix=LINKED6 %s
24500846d2SPierre van Houtryve 
25f616c3eeSSaiyedul Islam #include "Inputs/cuda.h"
26f616c3eeSSaiyedul Islam 
2721861991SSaiyedul Islam // LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
28f616c3eeSSaiyedul Islam // LINKED4-LABEL: bar
2921861991SSaiyedul Islam // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
30f616c3eeSSaiyedul Islam // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
31f616c3eeSSaiyedul Islam // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
32f616c3eeSSaiyedul Islam // LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
33f616c3eeSSaiyedul Islam // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
34f616c3eeSSaiyedul Islam // LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
35f616c3eeSSaiyedul Islam // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
36f616c3eeSSaiyedul Islam // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
37f616c3eeSSaiyedul Islam 
3821861991SSaiyedul Islam // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
39f616c3eeSSaiyedul Islam // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
40f616c3eeSSaiyedul Islam // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
41f616c3eeSSaiyedul Islam // LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
42f616c3eeSSaiyedul Islam // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
43f616c3eeSSaiyedul Islam // LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
44f616c3eeSSaiyedul Islam // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
45f616c3eeSSaiyedul Islam // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
46f616c3eeSSaiyedul Islam 
4721861991SSaiyedul Islam // LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
48f616c3eeSSaiyedul Islam // LINKED4-NOT: icmp sge i32 %{{.*}}, 500
49f616c3eeSSaiyedul Islam // LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
50f616c3eeSSaiyedul Islam // LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
51f616c3eeSSaiyedul Islam // LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
52f616c3eeSSaiyedul Islam // LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
53f616c3eeSSaiyedul Islam // LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
54f616c3eeSSaiyedul Islam // LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
55*4490003aSEmma Pilkington // LINKED4: "amdhsa_code_object_version", i32 400
56f616c3eeSSaiyedul Islam 
5721861991SSaiyedul Islam // LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
58f616c3eeSSaiyedul Islam // LINKED5-LABEL: bar
5921861991SSaiyedul Islam // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
60f616c3eeSSaiyedul Islam // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
61f616c3eeSSaiyedul Islam // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
62f616c3eeSSaiyedul Islam // LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
63f616c3eeSSaiyedul Islam // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
64f616c3eeSSaiyedul Islam // LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
65f616c3eeSSaiyedul Islam // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
66f616c3eeSSaiyedul Islam // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
67f616c3eeSSaiyedul Islam 
6821861991SSaiyedul Islam // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
69f616c3eeSSaiyedul Islam // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
70f616c3eeSSaiyedul Islam // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
71f616c3eeSSaiyedul Islam // LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
72f616c3eeSSaiyedul Islam // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
73f616c3eeSSaiyedul Islam // LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
74f616c3eeSSaiyedul Islam // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
75f616c3eeSSaiyedul Islam // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
76f616c3eeSSaiyedul Islam 
7721861991SSaiyedul Islam // LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
78f616c3eeSSaiyedul Islam // LINKED5-NOT: icmp sge i32 %{{.*}}, 500
79f616c3eeSSaiyedul Islam // LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
80f616c3eeSSaiyedul Islam // LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
81f616c3eeSSaiyedul Islam // LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
82f616c3eeSSaiyedul Islam // LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
83f616c3eeSSaiyedul Islam // LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
84f616c3eeSSaiyedul Islam // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
85*4490003aSEmma Pilkington // LINKED5: "amdhsa_code_object_version", i32 500
86f616c3eeSSaiyedul Islam 
87500846d2SPierre van Houtryve // LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
88500846d2SPierre van Houtryve // LINKED6-LABEL: bar
89500846d2SPierre van Houtryve // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
90500846d2SPierre van Houtryve // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
91500846d2SPierre van Houtryve // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
92500846d2SPierre van Houtryve // LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
93500846d2SPierre van Houtryve // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
94500846d2SPierre van Houtryve // LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
95500846d2SPierre van Houtryve // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
96500846d2SPierre van Houtryve // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
97500846d2SPierre van Houtryve 
98500846d2SPierre van Houtryve // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
99500846d2SPierre van Houtryve // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
100500846d2SPierre van Houtryve // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
101500846d2SPierre van Houtryve // LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
102500846d2SPierre van Houtryve // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
103500846d2SPierre van Houtryve // LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
104500846d2SPierre van Houtryve // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
105500846d2SPierre van Houtryve // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
106500846d2SPierre van Houtryve 
107500846d2SPierre van Houtryve // LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
108500846d2SPierre van Houtryve // LINKED6-NOT: icmp sge i32 %{{.*}}, 500
109500846d2SPierre van Houtryve // LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
110500846d2SPierre van Houtryve // LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
111500846d2SPierre van Houtryve // LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
112500846d2SPierre van Houtryve // LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
113500846d2SPierre van Houtryve // LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
114500846d2SPierre van Houtryve // LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
115*4490003aSEmma Pilkington // LINKED6: "amdhsa_code_object_version", i32 600
116500846d2SPierre van Houtryve 
117f616c3eeSSaiyedul Islam #ifdef DEVICELIB
bar(int * x,int * y,int * z)118f616c3eeSSaiyedul Islam __device__ void bar(int *x, int *y, int *z)
119f616c3eeSSaiyedul Islam {
120f616c3eeSSaiyedul Islam   *x = __builtin_amdgcn_workgroup_size_x();
121f616c3eeSSaiyedul Islam   *y = __builtin_amdgcn_workgroup_size_y();
122f616c3eeSSaiyedul Islam   *z = __builtin_amdgcn_workgroup_size_z();
123f616c3eeSSaiyedul Islam }
124f616c3eeSSaiyedul Islam #endif
125f616c3eeSSaiyedul Islam 
126f616c3eeSSaiyedul Islam #ifdef USER
127f616c3eeSSaiyedul Islam __device__ void bar(int *x, int *y, int *z);
foo()128f616c3eeSSaiyedul Islam __device__ void foo()
129f616c3eeSSaiyedul Islam {
130f616c3eeSSaiyedul Islam   int *x, *y, *z;
131f616c3eeSSaiyedul Islam   bar(x, y, z);
132f616c3eeSSaiyedul Islam }
133f616c3eeSSaiyedul Islam #endif
134