1*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s 2*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s 3*b279f6b0SFangrui Song; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} 4*b279f6b0SFangrui Song; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} 502c24688SStuart Adams 602c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.wait.group(i32) 702c24688SStuart Adams 86963c61fSArtem Belevich; CHECK-LABEL: asyncwaitgroup 902c24688SStuart Adamsdefine void @asyncwaitgroup() { 106963c61fSArtem Belevich ; CHECK: cp.async.wait_group 8; 1102c24688SStuart Adams tail call void @llvm.nvvm.cp.async.wait.group(i32 8) 126963c61fSArtem Belevich ; CHECK: cp.async.wait_group 0; 1302c24688SStuart Adams tail call void @llvm.nvvm.cp.async.wait.group(i32 0) 146963c61fSArtem Belevich ; CHECK: cp.async.wait_group 16; 1502c24688SStuart Adams tail call void @llvm.nvvm.cp.async.wait.group(i32 16) 1602c24688SStuart Adams ret void 1702c24688SStuart Adams} 1802c24688SStuart Adams 1902c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.wait.all() 2002c24688SStuart Adams 216963c61fSArtem Belevich; CHECK-LABEL: asyncwaitall 2202c24688SStuart Adamsdefine void @asyncwaitall() { 236963c61fSArtem Belevich; CHECK: cp.async.wait_all 2402c24688SStuart Adams tail call void @llvm.nvvm.cp.async.wait.all() 2502c24688SStuart Adams ret void 2602c24688SStuart Adams} 2702c24688SStuart Adams 2802c24688SStuart Adamsdeclare void @llvm.nvvm.cp.async.commit.group() 2902c24688SStuart Adams 306963c61fSArtem Belevich; CHECK-LABEL: asynccommitgroup 3102c24688SStuart Adamsdefine void @asynccommitgroup() { 326963c61fSArtem Belevich; CHECK: cp.async.commit_group 3302c24688SStuart Adams tail call void @llvm.nvvm.cp.async.commit.group() 3402c24688SStuart Adams ret void 3502c24688SStuart Adams} 3602c24688SStuart Adams 379b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a) 389b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a) 399b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a) 409b81548aSNikita Popovdeclare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a) 4102c24688SStuart Adams 4202c24688SStuart Adams; CHECK-LABEL: asyncmbarrier 439b81548aSNikita Popovdefine void @asyncmbarrier(ptr %a) { 446963c61fSArtem Belevich; The distinction between PTX32/PTX64 here is only to capture pointer register type 456963c61fSArtem Belevich; in R to be used in subsequent tests. 466963c61fSArtem Belevich; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%[[R:r]]{{[0-9]+}}]; 476963c61fSArtem Belevich; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%[[R:rd]]{{[0-9]+}}]; 489b81548aSNikita Popov tail call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a) 4902c24688SStuart Adams ret void 5002c24688SStuart Adams} 5102c24688SStuart Adams 5202c24688SStuart Adams; CHECK-LABEL: asyncmbarriershared 539b81548aSNikita Popovdefine void @asyncmbarriershared(ptr addrspace(3) %a) { 546963c61fSArtem Belevich; CHECK: cp.async.mbarrier.arrive.shared.b64 [%[[R]]{{[0-9]+}}]; 559b81548aSNikita Popov tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a) 5602c24688SStuart Adams ret void 5702c24688SStuart Adams} 5802c24688SStuart Adams 5902c24688SStuart Adams; CHECK-LABEL: asyncmbarriernoinc 609b81548aSNikita Popovdefine void @asyncmbarriernoinc(ptr %a) { 616963c61fSArtem Belevich; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%[[R]]{{[0-9]+}}]; 629b81548aSNikita Popov tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a) 6302c24688SStuart Adams ret void 6402c24688SStuart Adams} 6502c24688SStuart Adams 6602c24688SStuart Adams; CHECK-LABEL: asyncmbarriernoincshared 679b81548aSNikita Popovdefine void @asyncmbarriernoincshared(ptr addrspace(3) %a) { 686963c61fSArtem Belevich; CHECK: cp.async.mbarrier.arrive.noinc.shared.b64 [%[[R]]{{[0-9]+}}]; 699b81548aSNikita Popov tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a) 7002c24688SStuart Adams ret void 7102c24688SStuart Adams} 7202c24688SStuart Adams 730e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b) 746963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 7502c24688SStuart Adams 7602c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal4i8 776963c61fSArtem Belevichdefine void @asynccasharedglobal4i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { 786963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4; 796963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, %r{{[0-9]+}}; 806963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, 1; 810e43eb24SArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b) 826963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 836963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1) 8402c24688SStuart Adams ret void 8502c24688SStuart Adams} 8602c24688SStuart Adams 870e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b) 886963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 8902c24688SStuart Adams 9002c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal8i8 916963c61fSArtem Belevichdefine void @asynccasharedglobal8i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { 926963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8; 936963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, %r{{[0-9]+}}; 946963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, 1; 950e43eb24SArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b) 966963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 976963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1) 9802c24688SStuart Adams ret void 9902c24688SStuart Adams} 10002c24688SStuart Adams 1010e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b) 1026963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 10302c24688SStuart Adams 10402c24688SStuart Adams; CHECK-LABEL: asynccasharedglobal16i8 1056963c61fSArtem Belevichdefine void @asynccasharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { 1066963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16; 1076963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}}; 1086963c61fSArtem Belevich; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1; 1090e43eb24SArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b) 1106963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 1116963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1) 11202c24688SStuart Adams ret void 11302c24688SStuart Adams} 11402c24688SStuart Adams 1150e43eb24SArtem Belevichdeclare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b) 1166963c61fSArtem Belevichdeclare void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 11702c24688SStuart Adams 11802c24688SStuart Adams; CHECK-LABEL: asynccgsharedglobal16i8 1196963c61fSArtem Belevichdefine void @asynccgsharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) { 1206963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16; 1216963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}}; 1226963c61fSArtem Belevich; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1; 1230e43eb24SArtem Belevich tail call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b) 1246963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) 1256963c61fSArtem Belevich tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1) 12602c24688SStuart Adams ret void 12702c24688SStuart Adams} 128