1; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64 3; RUN: %if ptxas-11.0 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} 4; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} 5 6declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b) 7declare void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b) 8 9; CHECK-LABEL: barrierinit 10define void @barrierinit(ptr %a, i32 %b) { 11; CHECK_PTX32: mbarrier.init.b64 [%r{{[0-9]+}}], %r{{[0-9]+}}; 12; CHECK_PTX64: mbarrier.init.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}}; 13 tail call void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b) 14 ret void 15} 16 17; CHECK-LABEL: barrierinitshared 18define void @barrierinitshared(ptr addrspace(3) %a, i32 %b) { 19; CHECK_PTX32: mbarrier.init.shared.b64 [%r{{[0-9]+}}], %r{{[0-9]+}}; 20; CHECK_PTX64: mbarrier.init.shared.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}}; 21 tail call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b) 22 ret void 23} 24 25declare void @llvm.nvvm.mbarrier.inval(ptr %a) 26declare void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a) 27 28; CHECK-LABEL: barrierinval 29define void @barrierinval(ptr %a) { 30; CHECK_PTX32: mbarrier.inval.b64 [%r{{[0-1]+}}]; 31; CHECK_PTX64: mbarrier.inval.b64 [%rd{{[0-1]+}}]; 32 tail call void @llvm.nvvm.mbarrier.inval(ptr %a) 33 ret void 34} 35 36; CHECK-LABEL: barrierinvalshared 37define void @barrierinvalshared(ptr addrspace(3) %a) { 38; CHECK_PTX32: mbarrier.inval.shared.b64 [%r{{[0-1]+}}]; 39; CHECK_PTX64: mbarrier.inval.shared.b64 [%rd{{[0-1]+}}]; 40 tail call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a) 41 ret void 42} 43 44declare i64 @llvm.nvvm.mbarrier.arrive(ptr %a) 45declare i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a) 46 47; CHECK-LABEL: barrierarrive 48define void @barrierarrive(ptr %a) { 49; CHECK_PTX32: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 50; CHECK_PTX64: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 51 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(ptr %a) 52 ret void 53} 54 55; CHECK-LABEL: barrierarriveshared 56define void @barrierarriveshared(ptr addrspace(3) %a) { 57; CHECK_PTX32: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 58; CHECK_PTX64: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 59 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a) 60 ret void 61} 62 63declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b) 64declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b) 65 66; CHECK-LABEL: barrierarrivenoComplete 67define void @barrierarrivenoComplete(ptr %a, i32 %b) { 68; CHECK_PTX32: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 69; CHECK_PTX64: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 70 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b) 71 ret void 72} 73 74; CHECK-LABEL: barrierarrivenoCompleteshared 75define void @barrierarrivenoCompleteshared(ptr addrspace(3) %a, i32 %b) { 76; CHECK_PTX32: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 77; CHECK_PTX64: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 78 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b) 79 ret void 80} 81 82declare i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a) 83declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a) 84 85; CHECK-LABEL: barrierarrivedrop 86define void @barrierarrivedrop(ptr %a) { 87; CHECK_PTX32: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 88; CHECK_PTX64: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 89 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a) 90 ret void 91} 92 93; CHECK-LABEL: barrierarrivedropshared 94define void @barrierarrivedropshared(ptr addrspace(3) %a) { 95; CHECK_PTX32: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]; 96; CHECK_PTX64: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]; 97 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a) 98 ret void 99} 100 101declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b) 102declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b) 103 104; CHECK-LABEL: barrierarrivedropnoComplete 105define void @barrierarrivedropnoComplete(ptr %a, i32 %b) { 106; CHECK_PTX32: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 107; CHECK_PTX64: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 108 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b) 109 ret void 110} 111 112; CHECK-LABEL: barrierarrivedropnoCompleteshared 113define void @barrierarrivedropnoCompleteshared(ptr addrspace(3) %a, i32 %b) { 114; CHECK_PTX32: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}}; 115; CHECK_PTX64: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}}; 116 %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b) 117 ret void 118} 119 120declare i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b) 121declare i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b) 122 123; CHECK-LABEL: barriertestwait 124define void @barriertestwait(ptr %a, i64 %b) { 125; CHECK_PTX32: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}}; 126; CHECK_PTX64: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}}; 127 %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b) 128 ret void 129} 130 131; CHECK-LABEL: barriertestwaitshared 132define void @barriertestwaitshared(ptr addrspace(3) %a, i64 %b) { 133; CHECK_PTX32: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}}; 134; CHECK_PTX64: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}}; 135 %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b) 136 ret void 137} 138 139declare i32 @llvm.nvvm.mbarrier.pending.count(i64 %b) 140 141; CHECK-LABEL: barrierpendingcount 142define i32 @barrierpendingcount(ptr %a, i64 %b) { 143; CHECK_PTX32: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}}; 144; CHECK_PTX64: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}}; 145 %ret = tail call i32 @llvm.nvvm.mbarrier.pending.count(i64 %b) 146 ret i32 %ret 147} 148