1; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s 3; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} 4; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %} 5 6; CHECK-LABEL: .func test_atomics_scope( 7define void @test_atomics_scope(ptr %fp, float %f, 8 ptr %dfp, double %df, 9 ptr %ip, i32 %i, 10 ptr %uip, i32 %ui, 11 ptr %llp, i64 %ll) #0 { 12entry: 13; CHECK: atom.cta.add.s32 14 %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i) 15; CHECK: atom.cta.add.u64 16 %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 17; CHECK: atom.sys.add.s32 18 %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr %ip, i32 %i) 19; CHECK: atom.sys.add.u64 20 %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 21; CHECK: atom.cta.add.f32 22 %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f) 23; CHECK: atom.cta.add.f64 24 %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df) 25; CHECK: atom.sys.add.f32 26 %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr %fp, float %f) 27; CHECK: atom.sys.add.f64 28 %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr %dfp, double %df) 29 30; CHECK: atom.cta.exch.b32 31 %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr %ip, i32 %i) 32; CHECK: atom.cta.exch.b64 33 %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 34; CHECK: atom.sys.exch.b32 35 %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr %ip, i32 %i) 36; CHECK: atom.sys.exch.b64 37 %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 38 39; CHECK: atom.cta.max.s32 40 %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr %ip, i32 %i) 41; CHECK: atom.cta.max.s64 42 %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 43; CHECK: atom.sys.max.s32 44 %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr %ip, i32 %i) 45; CHECK: atom.sys.max.s64 46 %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 47 48; CHECK: atom.cta.min.s32 49 %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr %ip, i32 %i) 50; CHECK: atom.cta.min.s64 51 %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 52; CHECK: atom.sys.min.s32 53 %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr %ip, i32 %i) 54; CHECK: atom.sys.min.s64 55 %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 56 57; CHECK: atom.cta.inc.u32 58 %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr %ip, i32 %i) 59; CHECK: atom.sys.inc.u32 60 %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr %ip, i32 %i) 61 62; CHECK: atom.cta.dec.u32 63 %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr %ip, i32 %i) 64; CHECK: atom.sys.dec.u32 65 %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr %ip, i32 %i) 66 67; CHECK: atom.cta.and.b32 68 %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr %ip, i32 %i) 69; CHECK: atom.cta.and.b64 70 %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 71; CHECK: atom.sys.and.b32 72 %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr %ip, i32 %i) 73; CHECK: atom.sys.and.b64 74 %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 75 76; CHECK: atom.cta.or.b32 77 %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr %ip, i32 %i) 78; CHECK: atom.cta.or.b64 79 %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 80; CHECK: atom.sys.or.b32 81 %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr %ip, i32 %i) 82; CHECK: atom.sys.or.b64 83 %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 84 85; CHECK: atom.cta.xor.b32 86 %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr %ip, i32 %i) 87; CHECK: atom.cta.xor.b64 88 %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 89; CHECK: atom.sys.xor.b32 90 %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr %ip, i32 %i) 91; CHECK: atom.sys.xor.b64 92 %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr %llp, i64 %ll) 93 94; CHECK: atom.cta.cas.b32 95 %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i) 96; CHECK: atom.cta.cas.b64 97 %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr %llp, i64 %ll, i64 %ll) 98; CHECK: atom.sys.cas.b32 99 %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr %ip, i32 %i, i32 %i) 100; CHECK: atom.sys.cas.b64 101 %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr %llp, i64 %ll, i64 %ll) 102 103 ; CHECK: ret 104 ret void 105} 106 107; Make sure we use constants as operands to our scoped atomic calls, where appropriate. 108; CHECK-LABEL: .func test_atomics_scope_imm( 109define void @test_atomics_scope_imm(ptr %fp, float %f, 110 ptr %dfp, double %df, 111 ptr %ip, i32 %i, 112 ptr %uip, i32 %ui, 113 ptr %llp, i64 %ll) #0 { 114 115; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}}; 116 %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i) 117; CHECK: atom.cta.add.s32{{.*}}, 1; 118 %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 1) 119; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}}; 120 %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll) 121; CHECK: atom.cta.add.u64{{.*}}, 2; 122 %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 2) 123 124; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}}; 125 %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f) 126; CHECK: atom.cta.add.f32{{.*}}, 0f40400000; 127 %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float 3.0) 128; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}}; 129 %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df) 130; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000; 131 %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double 4.0) 132 133; CAS is implemented separately and has more arguments 134; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}}; 135 %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i) 136; For some reason in 64-bit mode we end up passing 51 via a register. 137; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51; 138 %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 51) 139; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}}; 140 %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 52, i32 %i) 141; CHECK: atom.cta.cas.b32{{.*}}], 53, 54; 142 %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 53, i32 54) 143 144 ; CHECK: ret 145 ret void 146} 147 148declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr nocapture, i32) #1 149declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr nocapture, i64) #1 150declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr nocapture, i32) #1 151declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr nocapture, i64) #1 152declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr nocapture, float) #1 153declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr nocapture, double) #1 154declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr nocapture, float) #1 155declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr nocapture, double) #1 156declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr nocapture, i32) #1 157declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr nocapture, i64) #1 158declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr nocapture, i32) #1 159declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr nocapture, i64) #1 160declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr nocapture, i32) #1 161declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr nocapture, i64) #1 162declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr nocapture, i32) #1 163declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr nocapture, i64) #1 164declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr nocapture, i32) #1 165declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr nocapture, i64) #1 166declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr nocapture, i32) #1 167declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr nocapture, i64) #1 168declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr nocapture, i32) #1 169declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr nocapture, i32) #1 170declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr nocapture, i32) #1 171declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr nocapture, i32) #1 172declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr nocapture, i32) #1 173declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr nocapture, i64) #1 174declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr nocapture, i32) #1 175declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr nocapture, i64) #1 176declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr nocapture, i32) #1 177declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr nocapture, i64) #1 178declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr nocapture, i32) #1 179declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr nocapture, i64) #1 180declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr nocapture, i32) #1 181declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr nocapture, i64) #1 182declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr nocapture, i32) #1 183declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr nocapture, i64) #1 184declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr nocapture, i32, i32) #1 185declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr nocapture, i64, i64) #1 186declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr nocapture, i32, i32) #1 187declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr nocapture, i64, i64) #1 188 189attributes #1 = { argmemonly nounwind } 190