1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march nvptx64 --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %} 4; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK 5; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %} 6 7; TODO: these are system scope, but are compiled to gpu scope.. 8; TODO: these are seq_cst, but are compiled to relaxed.. 9 10; CHECK-LABEL: relaxed_sys_i8 11define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { 12; SM30-LABEL: relaxed_sys_i8( 13; SM30: { 14; SM30-NEXT: .reg .pred %p<3>; 15; SM30-NEXT: .reg .b16 %rs<2>; 16; SM30-NEXT: .reg .b32 %r<21>; 17; SM30-NEXT: .reg .b64 %rd<3>; 18; SM30-EMPTY: 19; SM30-NEXT: // %bb.0: 20; SM30-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2]; 21; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0]; 22; SM30-NEXT: and.b64 %rd1, %rd2, -4; 23; SM30-NEXT: cvt.u32.u64 %r9, %rd2; 24; SM30-NEXT: and.b32 %r10, %r9, 3; 25; SM30-NEXT: shl.b32 %r1, %r10, 3; 26; SM30-NEXT: mov.b32 %r11, 255; 27; SM30-NEXT: shl.b32 %r12, %r11, %r1; 28; SM30-NEXT: not.b32 %r2, %r12; 29; SM30-NEXT: cvt.u32.u16 %r13, %rs1; 30; SM30-NEXT: and.b32 %r14, %r13, 255; 31; SM30-NEXT: shl.b32 %r3, %r14, %r1; 32; SM30-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1]; 33; SM30-NEXT: shl.b32 %r4, %r15, %r1; 34; SM30-NEXT: ld.u32 %r16, [%rd1]; 35; SM30-NEXT: and.b32 %r20, %r16, %r2; 36; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop 37; SM30-NEXT: // =>This Inner Loop Header: Depth=1 38; SM30-NEXT: or.b32 %r17, %r20, %r3; 39; SM30-NEXT: or.b32 %r18, %r20, %r4; 40; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; 41; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; 42; SM30-NEXT: @%p1 bra $L__BB0_3; 43; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure 44; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1 45; SM30-NEXT: and.b32 %r8, %r7, %r2; 46; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; 47; SM30-NEXT: mov.u32 %r20, %r8; 48; SM30-NEXT: @%p2 bra $L__BB0_1; 49; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end 50; SM30-NEXT: st.param.b32 [func_retval0], %r13; 51; SM30-NEXT: ret; 52; 53; SM70-LABEL: relaxed_sys_i8( 54; SM70: { 55; SM70-NEXT: .reg .pred %p<3>; 56; SM70-NEXT: .reg .b16 %rs<2>; 57; SM70-NEXT: .reg .b32 %r<21>; 58; SM70-NEXT: .reg .b64 %rd<3>; 59; SM70-EMPTY: 60; SM70-NEXT: // %bb.0: 61; SM70-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2]; 62; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0]; 63; SM70-NEXT: and.b64 %rd1, %rd2, -4; 64; SM70-NEXT: cvt.u32.u64 %r9, %rd2; 65; SM70-NEXT: and.b32 %r10, %r9, 3; 66; SM70-NEXT: shl.b32 %r1, %r10, 3; 67; SM70-NEXT: mov.b32 %r11, 255; 68; SM70-NEXT: shl.b32 %r12, %r11, %r1; 69; SM70-NEXT: not.b32 %r2, %r12; 70; SM70-NEXT: cvt.u32.u16 %r13, %rs1; 71; SM70-NEXT: and.b32 %r14, %r13, 255; 72; SM70-NEXT: shl.b32 %r3, %r14, %r1; 73; SM70-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1]; 74; SM70-NEXT: shl.b32 %r4, %r15, %r1; 75; SM70-NEXT: ld.u32 %r16, [%rd1]; 76; SM70-NEXT: and.b32 %r20, %r16, %r2; 77; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop 78; SM70-NEXT: // =>This Inner Loop Header: Depth=1 79; SM70-NEXT: or.b32 %r17, %r20, %r3; 80; SM70-NEXT: or.b32 %r18, %r20, %r4; 81; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; 82; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; 83; SM70-NEXT: @%p1 bra $L__BB0_3; 84; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure 85; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 86; SM70-NEXT: and.b32 %r8, %r7, %r2; 87; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; 88; SM70-NEXT: mov.u32 %r20, %r8; 89; SM70-NEXT: @%p2 bra $L__BB0_1; 90; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end 91; SM70-NEXT: st.param.b32 [func_retval0], %r13; 92; SM70-NEXT: ret; 93 %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic 94 ret i8 %new 95} 96 97; CHECK-LABEL: relaxed_sys_i16 98define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { 99; SM30-LABEL: relaxed_sys_i16( 100; SM30: { 101; SM30-NEXT: .reg .pred %p<3>; 102; SM30-NEXT: .reg .b16 %rs<2>; 103; SM30-NEXT: .reg .b32 %r<20>; 104; SM30-NEXT: .reg .b64 %rd<3>; 105; SM30-EMPTY: 106; SM30-NEXT: // %bb.0: 107; SM30-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2]; 108; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0]; 109; SM30-NEXT: and.b64 %rd1, %rd2, -4; 110; SM30-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1]; 111; SM30-NEXT: cvt.u32.u64 %r10, %rd2; 112; SM30-NEXT: and.b32 %r11, %r10, 3; 113; SM30-NEXT: shl.b32 %r1, %r11, 3; 114; SM30-NEXT: mov.b32 %r12, 65535; 115; SM30-NEXT: shl.b32 %r13, %r12, %r1; 116; SM30-NEXT: not.b32 %r2, %r13; 117; SM30-NEXT: cvt.u32.u16 %r14, %rs1; 118; SM30-NEXT: shl.b32 %r3, %r14, %r1; 119; SM30-NEXT: shl.b32 %r4, %r9, %r1; 120; SM30-NEXT: ld.u32 %r15, [%rd1]; 121; SM30-NEXT: and.b32 %r19, %r15, %r2; 122; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop 123; SM30-NEXT: // =>This Inner Loop Header: Depth=1 124; SM30-NEXT: or.b32 %r16, %r19, %r3; 125; SM30-NEXT: or.b32 %r17, %r19, %r4; 126; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; 127; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; 128; SM30-NEXT: @%p1 bra $L__BB1_3; 129; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure 130; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1 131; SM30-NEXT: and.b32 %r8, %r7, %r2; 132; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; 133; SM30-NEXT: mov.u32 %r19, %r8; 134; SM30-NEXT: @%p2 bra $L__BB1_1; 135; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end 136; SM30-NEXT: st.param.b32 [func_retval0], %r14; 137; SM30-NEXT: ret; 138; 139; SM70-LABEL: relaxed_sys_i16( 140; SM70: { 141; SM70-NEXT: .reg .pred %p<3>; 142; SM70-NEXT: .reg .b16 %rs<2>; 143; SM70-NEXT: .reg .b32 %r<20>; 144; SM70-NEXT: .reg .b64 %rd<3>; 145; SM70-EMPTY: 146; SM70-NEXT: // %bb.0: 147; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2]; 148; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0]; 149; SM70-NEXT: and.b64 %rd1, %rd2, -4; 150; SM70-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1]; 151; SM70-NEXT: cvt.u32.u64 %r10, %rd2; 152; SM70-NEXT: and.b32 %r11, %r10, 3; 153; SM70-NEXT: shl.b32 %r1, %r11, 3; 154; SM70-NEXT: mov.b32 %r12, 65535; 155; SM70-NEXT: shl.b32 %r13, %r12, %r1; 156; SM70-NEXT: not.b32 %r2, %r13; 157; SM70-NEXT: cvt.u32.u16 %r14, %rs1; 158; SM70-NEXT: shl.b32 %r3, %r14, %r1; 159; SM70-NEXT: shl.b32 %r4, %r9, %r1; 160; SM70-NEXT: ld.u32 %r15, [%rd1]; 161; SM70-NEXT: and.b32 %r19, %r15, %r2; 162; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop 163; SM70-NEXT: // =>This Inner Loop Header: Depth=1 164; SM70-NEXT: or.b32 %r16, %r19, %r3; 165; SM70-NEXT: or.b32 %r17, %r19, %r4; 166; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; 167; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; 168; SM70-NEXT: @%p1 bra $L__BB1_3; 169; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure 170; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 171; SM70-NEXT: and.b32 %r8, %r7, %r2; 172; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; 173; SM70-NEXT: mov.u32 %r19, %r8; 174; SM70-NEXT: @%p2 bra $L__BB1_1; 175; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end 176; SM70-NEXT: st.param.b32 [func_retval0], %r14; 177; SM70-NEXT: ret; 178 %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic 179 ret i16 %new 180} 181 182; CHECK-LABEL: relaxed_sys_i32 183define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) { 184; SM30-LABEL: relaxed_sys_i32( 185; SM30: { 186; SM30-NEXT: .reg .b32 %r<4>; 187; SM30-NEXT: .reg .b64 %rd<2>; 188; SM30-EMPTY: 189; SM30-NEXT: // %bb.0: 190; SM30-NEXT: ld.param.u64 %rd1, [relaxed_sys_i32_param_0]; 191; SM30-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; 192; SM30-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; 193; SM30-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; 194; SM30-NEXT: st.param.b32 [func_retval0], %r2; 195; SM30-NEXT: ret; 196; 197; SM70-LABEL: relaxed_sys_i32( 198; SM70: { 199; SM70-NEXT: .reg .b32 %r<4>; 200; SM70-NEXT: .reg .b64 %rd<2>; 201; SM70-EMPTY: 202; SM70-NEXT: // %bb.0: 203; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i32_param_0]; 204; SM70-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; 205; SM70-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; 206; SM70-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; 207; SM70-NEXT: st.param.b32 [func_retval0], %r2; 208; SM70-NEXT: ret; 209 %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic 210 ret i32 %new 211} 212 213; CHECK-LABEL: relaxed_sys_i64 214define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) { 215; SM30-LABEL: relaxed_sys_i64( 216; SM30: { 217; SM30-NEXT: .reg .b64 %rd<5>; 218; SM30-EMPTY: 219; SM30-NEXT: // %bb.0: 220; SM30-NEXT: ld.param.u64 %rd1, [relaxed_sys_i64_param_0]; 221; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; 222; SM30-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; 223; SM30-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; 224; SM30-NEXT: st.param.b64 [func_retval0], %rd3; 225; SM30-NEXT: ret; 226; 227; SM70-LABEL: relaxed_sys_i64( 228; SM70: { 229; SM70-NEXT: .reg .b64 %rd<5>; 230; SM70-EMPTY: 231; SM70-NEXT: // %bb.0: 232; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i64_param_0]; 233; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; 234; SM70-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; 235; SM70-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; 236; SM70-NEXT: st.param.b64 [func_retval0], %rd3; 237; SM70-NEXT: ret; 238 %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic 239 ret i64 %new 240} 241;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 242; CHECK: {{.*}} 243