xref: /llvm-project/llvm/test/CodeGen/NVPTX/async-copy.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s
2*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s
3*b279f6b0SFangrui Song; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4*b279f6b0SFangrui Song; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
502c24688SStuart Adams
602c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.wait.group(i32)
702c24688SStuart Adams
86963c61fSArtem Belevich; CHECK-LABEL: asyncwaitgroup
902c24688SStuart Adamsdefine void @asyncwaitgroup() {
106963c61fSArtem Belevich  ; CHECK: cp.async.wait_group 8;
1102c24688SStuart Adams  tail call void @llvm.nvvm.cp.async.wait.group(i32 8)
126963c61fSArtem Belevich  ; CHECK: cp.async.wait_group 0;
1302c24688SStuart Adams  tail call void @llvm.nvvm.cp.async.wait.group(i32 0)
146963c61fSArtem Belevich  ; CHECK: cp.async.wait_group 16;
1502c24688SStuart Adams  tail call void @llvm.nvvm.cp.async.wait.group(i32 16)
1602c24688SStuart Adams  ret void
1702c24688SStuart Adams}
1802c24688SStuart Adams
1902c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.wait.all()
2002c24688SStuart Adams
216963c61fSArtem Belevich; CHECK-LABEL: asyncwaitall
2202c24688SStuart Adamsdefine void @asyncwaitall() {
236963c61fSArtem Belevich; CHECK: cp.async.wait_all
2402c24688SStuart Adams  tail call void @llvm.nvvm.cp.async.wait.all()
2502c24688SStuart Adams  ret void
2602c24688SStuart Adams}
2702c24688SStuart Adams
2802c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.commit.group()
2902c24688SStuart Adams
306963c61fSArtem Belevich; CHECK-LABEL: asynccommitgroup
3102c24688SStuart Adamsdefine void @asynccommitgroup() {
326963c61fSArtem Belevich; CHECK: cp.async.commit_group
3302c24688SStuart Adams  tail call void @llvm.nvvm.cp.async.commit.group()
3402c24688SStuart Adams  ret void
3502c24688SStuart Adams}
3602c24688SStuart Adams
379b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
389b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
399b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
409b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
4102c24688SStuart Adams
4202c24688SStuart Adams; CHECK-LABEL: asyncmbarrier
439b81548aSNikita Popovdefine void @asyncmbarrier(ptr %a) {
446963c61fSArtem Belevich; The distinction between PTX32/PTX64 here is only to capture pointer register type
456963c61fSArtem Belevich; in R to be used in subsequent tests.
466963c61fSArtem Belevich; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%[[R:r]]{{[0-9]+}}];
476963c61fSArtem Belevich; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%[[R:rd]]{{[0-9]+}}];
489b81548aSNikita Popov  tail call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
4902c24688SStuart Adams  ret void
5002c24688SStuart Adams}
5102c24688SStuart Adams
5202c24688SStuart Adams; CHECK-LABEL: asyncmbarriershared
539b81548aSNikita Popovdefine void @asyncmbarriershared(ptr addrspace(3) %a) {
546963c61fSArtem Belevich; CHECK: cp.async.mbarrier.arrive.shared.b64 [%[[R]]{{[0-9]+}}];
559b81548aSNikita Popov  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
5602c24688SStuart Adams  ret void
5702c24688SStuart Adams}
5802c24688SStuart Adams
5902c24688SStuart Adams; CHECK-LABEL: asyncmbarriernoinc
609b81548aSNikita Popovdefine void @asyncmbarriernoinc(ptr %a) {
616963c61fSArtem Belevich; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%[[R]]{{[0-9]+}}];
629b81548aSNikita Popov  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
6302c24688SStuart Adams  ret void
6402c24688SStuart Adams}
6502c24688SStuart Adams
6602c24688SStuart Adams; CHECK-LABEL: asyncmbarriernoincshared
679b81548aSNikita Popovdefine void @asyncmbarriernoincshared(ptr addrspace(3) %a) {
686963c61fSArtem Belevich; CHECK: cp.async.mbarrier.arrive.noinc.shared.b64 [%[[R]]{{[0-9]+}}];
699b81548aSNikita Popov  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
7002c24688SStuart Adams  ret void
7102c24688SStuart Adams}
7202c24688SStuart Adams
730e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
746963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
7502c24688SStuart Adams
7602c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal4i8
776963c61fSArtem Belevichdefine void @asynccasharedglobal4i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
786963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4;
796963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, %r{{[0-9]+}};
806963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, 1;
810e43eb24SArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
826963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
836963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
8402c24688SStuart Adams  ret void
8502c24688SStuart Adams}
8602c24688SStuart Adams
870e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
886963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
8902c24688SStuart Adams
9002c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal8i8
916963c61fSArtem Belevichdefine void @asynccasharedglobal8i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
926963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8;
936963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, %r{{[0-9]+}};
946963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, 1;
950e43eb24SArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
966963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
976963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
9802c24688SStuart Adams  ret void
9902c24688SStuart Adams}
10002c24688SStuart Adams
1010e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
1026963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
10302c24688SStuart Adams
10402c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal16i8
1056963c61fSArtem Belevichdefine void @asynccasharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
1066963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16;
1076963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}};
1086963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1;
1090e43eb24SArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
1106963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
1116963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
11202c24688SStuart Adams  ret void
11302c24688SStuart Adams}
11402c24688SStuart Adams
1150e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
1166963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
11702c24688SStuart Adams
11802c24688SStuart Adams; CHECK-LABEL: asynccgsharedglobal16i8
1196963c61fSArtem Belevichdefine void @asynccgsharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
1206963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16;
1216963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}};
1226963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1;
1230e43eb24SArtem Belevich  tail call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
1246963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
1256963c61fSArtem Belevich  tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
12602c24688SStuart Adams  ret void
12702c24688SStuart Adams}
128