1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,BASE %s 3; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,GFX940 %s 4; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,R600 %s 5 6target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 7 8define i16 @test_atomicrmw_xchg_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 9; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_agent( 10; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 11; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 12; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 13; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 14; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 15; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 16; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 17; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 18; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 19; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 20; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 21; CHECK: atomicrmw.start: 22; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 23; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 24; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] 25; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4 26; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 27; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 28; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 29; CHECK: atomicrmw.end: 30; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 31; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 32; CHECK-NEXT: ret i16 [[EXTRACTED]] 33; 34 %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 35 ret i16 %res 36} 37 38define i16 @test_atomicrmw_xchg_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) { 39; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_agent_align4( 40; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 41; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 42; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 43; CHECK: atomicrmw.start: 44; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 45; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536 46; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]] 47; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4 48; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 49; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 50; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 51; CHECK: atomicrmw.end: 52; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 53; CHECK-NEXT: ret i16 [[EXTRACTED]] 54; 55 %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4 56 ret i16 %res 57} 58 59define i16 @test_atomicrmw_add_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 60; CHECK-LABEL: @test_atomicrmw_add_i16_global_agent( 61; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 62; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 63; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 64; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 65; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 66; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 67; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 68; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 69; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 70; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 71; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 72; CHECK: atomicrmw.start: 73; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 74; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 75; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] 76; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 77; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] 78; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 79; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 80; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 81; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 82; CHECK: atomicrmw.end: 83; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 84; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 85; CHECK-NEXT: ret i16 [[EXTRACTED]] 86; 87 %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 88 ret i16 %res 89} 90 91define i16 @test_atomicrmw_add_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) { 92; CHECK-LABEL: @test_atomicrmw_add_i16_global_agent_align4( 93; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 94; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 95; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 96; CHECK: atomicrmw.start: 97; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 98; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]] 99; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535 100; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 101; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] 102; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4 103; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 104; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 105; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 106; CHECK: atomicrmw.end: 107; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 108; CHECK-NEXT: ret i16 [[EXTRACTED]] 109; 110 %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4 111 ret i16 %res 112} 113 114define i16 @test_atomicrmw_sub_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 115; CHECK-LABEL: @test_atomicrmw_sub_i16_global_agent( 116; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 117; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 118; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 119; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 120; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 121; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 122; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 123; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 124; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 125; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 126; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 127; CHECK: atomicrmw.start: 128; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 129; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 130; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] 131; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 132; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] 133; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 134; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 135; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 136; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 137; CHECK: atomicrmw.end: 138; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 139; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 140; CHECK-NEXT: ret i16 [[EXTRACTED]] 141; 142 %res = atomicrmw sub ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 143 ret i16 %res 144} 145 146define i16 @test_atomicrmw_and_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 147; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent( 148; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 149; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 150; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 151; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 152; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 153; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 154; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 155; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 156; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 157; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 158; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 159; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 160; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 161; CHECK-NEXT: ret i16 [[EXTRACTED]] 162; 163 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 164 ret i16 %res 165} 166 167define i16 @test_atomicrmw_and_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) { 168; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4( 169; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 170; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 171; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 172; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 173; CHECK-NEXT: ret i16 [[EXTRACTED]] 174; 175 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4 176 ret i16 %res 177} 178 179; Drop unknown metadata and noundef 180define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i16 %value) { 181; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_drop_md( 182; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 183; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 184; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 185; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 186; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 187; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 188; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 189; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 190; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 191; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 192; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 193; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 194; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 195; CHECK-NEXT: ret i16 [[EXTRACTED]] 196; 197 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !some.unknown.md !0 198 ret i16 %res 199} 200 201; Drop unknown metadata 202define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1) %ptr, i16 %value) { 203; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md( 204; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 205; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 206; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 207; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 208; CHECK-NEXT: ret i16 [[EXTRACTED]] 209; 210 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !some.unknown.md !0 211 ret i16 %res 212} 213 214; Drop noundef, preserve mmra 215define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) { 216; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_mmra( 217; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 218; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 219; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 220; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 221; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 222; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 223; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 224; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 225; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 226; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 227; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]] 228; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 229; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 230; CHECK-NEXT: ret i16 [[EXTRACTED]] 231; 232 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !mmra !1 233 ret i16 %res 234} 235 236; Drop noundef, preserve mmra 237define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) { 238; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra( 239; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 240; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 241; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]] 242; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 243; CHECK-NEXT: ret i16 [[EXTRACTED]] 244; 245 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !mmra !1 246 ret i16 %res 247} 248 249define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(ptr addrspace(1) %ptr, i16 %value) { 250; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope( 251; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 252; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 253; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !alias.scope [[META1:![0-9]+]] 254; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 255; CHECK-NEXT: ret i16 [[EXTRACTED]] 256; 257 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !alias.scope !2 258 ret i16 %res 259} 260 261define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(ptr addrspace(1) %ptr, i16 %value) { 262; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias( 263; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 264; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 265; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noalias [[META1]] 266; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 267; CHECK-NEXT: ret i16 [[EXTRACTED]] 268; 269 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noalias !2 270 ret i16 %res 271} 272 273define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(ptr addrspace(1) %ptr, i16 %value) { 274; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct( 275; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 276; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 277; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]] 278; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 279; CHECK-NEXT: ret i16 [[EXTRACTED]] 280; 281 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa.struct !5 282 ret i16 %res 283} 284 285define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(ptr addrspace(1) %ptr, i16 %value) { 286; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa( 287; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 288; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 289; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa [[TBAA5:![0-9]+]] 290; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 291; CHECK-NEXT: ret i16 [[EXTRACTED]] 292; 293 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa !6 294 ret i16 %res 295} 296 297define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) { 298; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory( 299; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 300; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 301; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 302; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 303; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 304; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 305; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 306; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 307; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 308; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 309; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8:![0-9]+]] 310; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 311; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 312; CHECK-NEXT: ret i16 [[EXTRACTED]] 313; 314 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 315 ret i16 %res 316} 317 318define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) { 319; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory( 320; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 321; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 322; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8]] 323; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 324; CHECK-NEXT: ret i16 [[EXTRACTED]] 325; 326 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0 327 ret i16 %res 328} 329 330define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i16 %value) { 331; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory( 332; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 333; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 334; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 335; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 336; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 337; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 338; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 339; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 340; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 341; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 342; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]] 343; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 344; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 345; CHECK-NEXT: ret i16 [[EXTRACTED]] 346; 347 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 348 ret i16 %res 349} 350 351define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i16 %value) { 352; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory( 353; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 354; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 355; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]] 356; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 357; CHECK-NEXT: ret i16 [[EXTRACTED]] 358; 359 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 360 ret i16 %res 361} 362 363define i16 @test_atomicrmw_nand_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 364; CHECK-LABEL: @test_atomicrmw_nand_i16_global_agent( 365; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 366; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 367; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 368; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 369; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 370; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 371; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 372; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 373; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 374; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 375; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 376; CHECK: atomicrmw.start: 377; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 378; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 379; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP5]], -1 380; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] 381; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 382; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] 383; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] syncscope("agent") seq_cst seq_cst, align 4 384; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 385; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 386; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 387; CHECK: atomicrmw.end: 388; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 389; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 390; CHECK-NEXT: ret i16 [[EXTRACTED]] 391; 392 %res = atomicrmw nand ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 393 ret i16 %res 394} 395 396define i16 @test_atomicrmw_or_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 397; CHECK-LABEL: @test_atomicrmw_or_i16_global_agent( 398; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 399; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 400; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 401; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 402; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 403; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 404; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 405; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 406; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 407; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4 408; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 409; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 410; CHECK-NEXT: ret i16 [[EXTRACTED]] 411; 412 %res = atomicrmw or ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 413 ret i16 %res 414} 415 416define i16 @test_atomicrmw_xor_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 417; CHECK-LABEL: @test_atomicrmw_xor_i16_global_agent( 418; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 419; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 420; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 421; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 422; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 423; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 424; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 425; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 426; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 427; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4 428; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 429; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 430; CHECK-NEXT: ret i16 [[EXTRACTED]] 431; 432 %res = atomicrmw xor ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 433 ret i16 %res 434} 435 436define i16 @test_atomicrmw_max_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 437; CHECK-LABEL: @test_atomicrmw_max_i16_global_agent( 438; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 439; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 440; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 441; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 442; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 443; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 444; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 445; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 446; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 447; CHECK: atomicrmw.start: 448; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 449; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 450; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 451; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]] 452; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 453; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 454; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 455; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 456; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 457; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 458; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 459; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 460; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 461; CHECK: atomicrmw.end: 462; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 463; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 464; CHECK-NEXT: ret i16 [[EXTRACTED3]] 465; 466 %res = atomicrmw max ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 467 ret i16 %res 468} 469 470define i16 @test_atomicrmw_min_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 471; CHECK-LABEL: @test_atomicrmw_min_i16_global_agent( 472; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 473; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 474; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 475; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 476; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 477; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 478; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 479; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 480; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 481; CHECK: atomicrmw.start: 482; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 483; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 484; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 485; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]] 486; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 487; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 488; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 489; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 490; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 491; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 492; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 493; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 494; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 495; CHECK: atomicrmw.end: 496; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 497; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 498; CHECK-NEXT: ret i16 [[EXTRACTED3]] 499; 500 %res = atomicrmw min ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 501 ret i16 %res 502} 503 504define i16 @test_atomicrmw_umax_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 505; CHECK-LABEL: @test_atomicrmw_umax_i16_global_agent( 506; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 507; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 508; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 509; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 510; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 511; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 512; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 513; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 514; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 515; CHECK: atomicrmw.start: 516; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 517; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 518; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 519; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 520; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 521; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 522; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 523; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 524; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 525; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 526; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 527; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 528; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 529; CHECK: atomicrmw.end: 530; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 531; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 532; CHECK-NEXT: ret i16 [[EXTRACTED3]] 533; 534 %res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 535 ret i16 %res 536} 537 538define i16 @test_atomicrmw_umin_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 539; CHECK-LABEL: @test_atomicrmw_umin_i16_global_agent( 540; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 541; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 542; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 543; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 544; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 545; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 546; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 547; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 548; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 549; CHECK: atomicrmw.start: 550; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 551; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 552; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 553; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]] 554; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 555; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 556; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 557; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 558; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 559; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 560; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 561; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 562; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 563; CHECK: atomicrmw.end: 564; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 565; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 566; CHECK-NEXT: ret i16 [[EXTRACTED3]] 567; 568 %res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 569 ret i16 %res 570} 571 572define i16 @test_cmpxchg_i16_global_agent(ptr addrspace(1) %out, i16 %in, i16 %old) { 573; CHECK-LABEL: @test_cmpxchg_i16_global_agent( 574; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4 575; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4) 576; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64 577; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 578; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 579; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 580; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 581; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 582; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32 583; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 584; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 585; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] 586; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 587; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] 588; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] 589; CHECK: partword.cmpxchg.loop: 590; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] 591; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] 592; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] 593; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 594; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 595; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 596; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] 597; CHECK: partword.cmpxchg.failure: 598; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] 599; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] 600; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] 601; CHECK: partword.cmpxchg.end: 602; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] 603; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 604; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 605; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 606; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 607; CHECK-NEXT: ret i16 [[EXTRACT]] 608; 609 %gep = getelementptr i16, ptr addrspace(1) %out, i64 4 610 %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst 611 %extract = extractvalue {i16, i1} %res, 0 612 ret i16 %extract 613} 614 615define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in, i16 %old) { 616; CHECK-LABEL: @test_cmpxchg_i16_global_agent_align4( 617; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4 618; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32 619; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32 620; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 621; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -65536 622; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] 623; CHECK: partword.cmpxchg.loop: 624; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] 625; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]] 626; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]] 627; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[GEP]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4 628; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 629; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 630; CHECK-NEXT: br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] 631; CHECK: partword.cmpxchg.failure: 632; CHECK-NEXT: [[TMP11]] = and i32 [[TMP9]], -65536 633; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]] 634; CHECK-NEXT: br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] 635; CHECK: partword.cmpxchg.end: 636; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16 637; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 638; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1 639; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0 640; CHECK-NEXT: ret i16 [[EXTRACT]] 641; 642 %gep = getelementptr i16, ptr addrspace(1) %out, i64 4 643 %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst, align 4 644 %extract = extractvalue {i16, i1} %res, 0 645 ret i16 %extract 646} 647 648define i16 @test_atomicrmw_xchg_i16_local(ptr addrspace(3) %ptr, i16 %value) { 649; CHECK-LABEL: @test_atomicrmw_xchg_i16_local( 650; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) 651; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 652; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 653; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 654; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 655; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 656; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 657; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]] 658; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 659; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 660; CHECK: atomicrmw.start: 661; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 662; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 663; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] 664; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 665; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 666; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 667; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 668; CHECK: atomicrmw.end: 669; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] 670; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 671; CHECK-NEXT: ret i16 [[EXTRACTED]] 672; 673 %res = atomicrmw xchg ptr addrspace(3) %ptr, i16 %value seq_cst 674 ret i16 %res 675} 676 677define i16 @test_cmpxchg_i16_local(ptr addrspace(3) %out, i16 %in, i16 %old) { 678; CHECK-LABEL: @test_cmpxchg_i16_local( 679; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(3) [[OUT:%.*]], i64 4 680; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[GEP]], i32 -4) 681; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32 682; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 683; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 684; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 685; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 686; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32 687; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[TMP2]] 688; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 689; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[TMP2]] 690; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 691; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] 692; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] 693; CHECK: partword.cmpxchg.loop: 694; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] 695; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] 696; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] 697; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 698; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 699; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 700; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] 701; CHECK: partword.cmpxchg.failure: 702; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] 703; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] 704; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] 705; CHECK: partword.cmpxchg.end: 706; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[TMP2]] 707; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 708; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 709; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 710; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 711; CHECK-NEXT: ret i16 [[EXTRACT]] 712; 713 %gep = getelementptr i16, ptr addrspace(3) %out, i64 4 714 %res = cmpxchg ptr addrspace(3) %gep, i16 %old, i16 %in seq_cst seq_cst 715 %extract = extractvalue {i16, i1} %res, 0 716 ret i16 %extract 717} 718 719define i16 @test_atomicrmw_xor_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) { 720; CHECK-LABEL: @test_atomicrmw_xor_i16_local_align4( 721; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 722; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xor ptr addrspace(3) [[PTR:%.*]], i32 [[TMP1]] seq_cst, align 4 723; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 724; CHECK-NEXT: ret i16 [[EXTRACTED]] 725; 726 %res = atomicrmw xor ptr addrspace(3) %ptr, i16 %value seq_cst, align 4 727 ret i16 %res 728} 729 730define i16 @test_atomicrmw_inc_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 731; CHECK-LABEL: @test_atomicrmw_inc_i16_global_agent( 732; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 733; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 734; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 735; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 736; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 737; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 738; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 739; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 740; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 741; CHECK: atomicrmw.start: 742; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 743; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 744; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 745; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 746; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 747; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] 748; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 749; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 750; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 751; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 752; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 753; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 754; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 755; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 756; CHECK: atomicrmw.end: 757; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 758; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 759; CHECK-NEXT: ret i16 [[EXTRACTED3]] 760; 761 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 762 ret i16 %res 763} 764 765define i16 @test_atomicrmw_inc_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) { 766; CHECK-LABEL: @test_atomicrmw_inc_i16_global_agent_align4( 767; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 768; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 769; CHECK: atomicrmw.start: 770; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 771; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 772; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 773; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 774; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] 775; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 776; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 777; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 778; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 779; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 780; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 781; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 782; CHECK: atomicrmw.end: 783; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 784; CHECK-NEXT: ret i16 [[EXTRACTED1]] 785; 786 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4 787 ret i16 %res 788} 789 790define i16 @test_atomicrmw_inc_i16_local(ptr addrspace(3) %ptr, i16 %value) { 791; CHECK-LABEL: @test_atomicrmw_inc_i16_local( 792; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) 793; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 794; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 795; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 796; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 797; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 798; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 799; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 800; CHECK: atomicrmw.start: 801; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 802; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] 803; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 804; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 805; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 806; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] 807; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 808; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] 809; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 810; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 811; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 812; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 813; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 814; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 815; CHECK: atomicrmw.end: 816; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] 817; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 818; CHECK-NEXT: ret i16 [[EXTRACTED3]] 819; 820 %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i16 %value syncscope("agent") seq_cst 821 ret i16 %res 822} 823 824define i16 @test_atomicrmw_inc_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) { 825; CHECK-LABEL: @test_atomicrmw_inc_i16_local_align4( 826; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 827; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 828; CHECK: atomicrmw.start: 829; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 830; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 831; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 832; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 833; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] 834; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 835; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 836; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 837; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 838; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 839; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 840; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 841; CHECK: atomicrmw.end: 842; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 843; CHECK-NEXT: ret i16 [[EXTRACTED1]] 844; 845 %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i16 %value syncscope("agent") seq_cst, align 4 846 ret i16 %res 847} 848 849define i16 @test_atomicrmw_inc_i16_flat_agent(ptr %ptr, i16 %value) { 850; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_agent( 851; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 852; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 853; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 854; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 855; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 856; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 857; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 858; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 859; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 860; CHECK: atomicrmw.start: 861; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 862; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 863; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 864; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 865; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 866; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] 867; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 868; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 869; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 870; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 871; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 872; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 873; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 874; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 875; CHECK: atomicrmw.end: 876; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 877; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 878; CHECK-NEXT: ret i16 [[EXTRACTED3]] 879; 880 %res = atomicrmw uinc_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst 881 ret i16 %res 882} 883 884define i16 @test_atomicrmw_inc_i16_flat_agent_align4(ptr %ptr, i16 %value) { 885; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_agent_align4( 886; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 887; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 888; CHECK: atomicrmw.start: 889; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 890; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 891; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 892; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 893; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] 894; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 895; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 896; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 897; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 898; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 899; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 900; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 901; CHECK: atomicrmw.end: 902; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 903; CHECK-NEXT: ret i16 [[EXTRACTED1]] 904; 905 %res = atomicrmw uinc_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst, align 4 906 ret i16 %res 907} 908 909define i16 @test_atomicrmw_dec_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) { 910; CHECK-LABEL: @test_atomicrmw_dec_i16_global_agent( 911; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 912; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 913; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 914; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 915; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 916; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 917; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 918; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 919; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 920; CHECK: atomicrmw.start: 921; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 922; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 923; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 924; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 925; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 926; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 927; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 928; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] 929; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 930; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 931; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 932; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 933; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 934; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 935; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 936; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 937; CHECK: atomicrmw.end: 938; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 939; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 940; CHECK-NEXT: ret i16 [[EXTRACTED3]] 941; 942 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst 943 ret i16 %res 944} 945 946define i16 @test_atomicrmw_dec_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) { 947; CHECK-LABEL: @test_atomicrmw_dec_i16_global_agent_align4( 948; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 949; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 950; CHECK: atomicrmw.start: 951; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 952; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 953; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 954; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 955; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 956; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 957; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] 958; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 959; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 960; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 961; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 962; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 963; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 964; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 965; CHECK: atomicrmw.end: 966; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 967; CHECK-NEXT: ret i16 [[EXTRACTED1]] 968; 969 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4 970 ret i16 %res 971} 972 973define i16 @test_atomicrmw_dec_i16_local(ptr addrspace(3) %ptr, i16 %value) { 974; CHECK-LABEL: @test_atomicrmw_dec_i16_local( 975; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) 976; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 977; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 978; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 979; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 980; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 981; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 982; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 983; CHECK: atomicrmw.start: 984; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 985; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] 986; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 987; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 988; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 989; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 990; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 991; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] 992; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 993; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] 994; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 995; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 996; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 997; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 998; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 999; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1000; CHECK: atomicrmw.end: 1001; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] 1002; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 1003; CHECK-NEXT: ret i16 [[EXTRACTED3]] 1004; 1005 %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i16 %value seq_cst 1006 ret i16 %res 1007} 1008 1009define i16 @test_atomicrmw_dec_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) { 1010; CHECK-LABEL: @test_atomicrmw_dec_i16_local_align4( 1011; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 1012; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1013; CHECK: atomicrmw.start: 1014; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1015; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 1016; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 1017; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 1018; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 1019; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 1020; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] 1021; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 1022; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 1023; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 1024; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 1025; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1026; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1027; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1028; CHECK: atomicrmw.end: 1029; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 1030; CHECK-NEXT: ret i16 [[EXTRACTED1]] 1031; 1032 %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i16 %value seq_cst, align 4 1033 ret i16 %res 1034} 1035 1036define i16 @test_atomicrmw_dec_i16_flat_agent(ptr %ptr, i16 %value) { 1037; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_agent( 1038; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 1039; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 1040; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 1041; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 1042; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 1043; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 1044; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1045; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 1046; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1047; CHECK: atomicrmw.start: 1048; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1049; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 1050; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1051; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 1052; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 1053; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 1054; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 1055; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] 1056; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 1057; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 1058; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1059; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 1060; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 1061; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 1062; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 1063; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1064; CHECK: atomicrmw.end: 1065; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 1066; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 1067; CHECK-NEXT: ret i16 [[EXTRACTED3]] 1068; 1069 %res = atomicrmw udec_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst 1070 ret i16 %res 1071} 1072 1073define i16 @test_atomicrmw_dec_i16_flat_agent_align4(ptr %ptr, i16 %value) { 1074; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_agent_align4( 1075; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 1076; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1077; CHECK: atomicrmw.start: 1078; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1079; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 1080; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 1081; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 1082; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 1083; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 1084; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] 1085; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 1086; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 1087; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 1088; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4 1089; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1090; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1091; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1092; CHECK: atomicrmw.end: 1093; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 1094; CHECK-NEXT: ret i16 [[EXTRACTED1]] 1095; 1096 %res = atomicrmw udec_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst, align 4 1097 ret i16 %res 1098} 1099 1100define half @test_atomicrmw_xchg_f16_global_agent(ptr addrspace(1) %ptr, half %value) { 1101; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_agent( 1102; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 1103; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 1104; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 1105; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 1106; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 1107; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 1108; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1109; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16 1110; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 1111; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 1112; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 1113; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1114; CHECK: atomicrmw.start: 1115; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1116; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1117; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 1118; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 1119; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 1120; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 1121; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1122; CHECK: atomicrmw.end: 1123; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 1124; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1125; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half 1126; CHECK-NEXT: ret half [[TMP9]] 1127; 1128 %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value syncscope("agent") seq_cst 1129 ret half %res 1130} 1131 1132define half @test_atomicrmw_xchg_f16_global_agent_align4(ptr addrspace(1) %ptr, half %value) { 1133; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_agent_align4( 1134; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16 1135; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 1136; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 1137; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1138; CHECK: atomicrmw.start: 1139; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1140; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 1141; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 1142; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4 1143; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1144; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1145; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1146; CHECK: atomicrmw.end: 1147; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 1148; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half 1149; CHECK-NEXT: ret half [[TMP7]] 1150; 1151 %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value syncscope("agent") seq_cst, align 4 1152 ret half %res 1153} 1154 1155define half @test_atomicrmw_xchg_f16_flat_agent(ptr %ptr, half %value) { 1156; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_agent( 1157; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 1158; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 1159; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 1160; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 1161; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 1162; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 1163; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1164; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16 1165; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 1166; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 1167; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 1168; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1169; CHECK: atomicrmw.start: 1170; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1171; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1172; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 1173; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 1174; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 1175; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 1176; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1177; CHECK: atomicrmw.end: 1178; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 1179; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1180; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half 1181; CHECK-NEXT: ret half [[TMP9]] 1182; 1183 %res = atomicrmw xchg ptr %ptr, half %value syncscope("agent") seq_cst 1184 ret half %res 1185} 1186 1187define half @test_atomicrmw_xchg_f16_flat_agent_align4(ptr %ptr, half %value) { 1188; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_agent_align4( 1189; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16 1190; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 1191; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4 1192; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1193; CHECK: atomicrmw.start: 1194; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1195; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 1196; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 1197; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4 1198; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1199; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1200; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1201; CHECK: atomicrmw.end: 1202; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 1203; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half 1204; CHECK-NEXT: ret half [[TMP7]] 1205; 1206 %res = atomicrmw xchg ptr %ptr, half %value syncscope("agent") seq_cst, align 4 1207 ret half %res 1208} 1209 1210define bfloat @test_atomicrmw_xchg_bf16_global_agent(ptr addrspace(1) %ptr, bfloat %value) { 1211; CHECK-LABEL: @test_atomicrmw_xchg_bf16_global_agent( 1212; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 1213; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 1214; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 1215; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 1216; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 1217; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 1218; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1219; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16 1220; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 1221; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 1222; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 1223; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1224; CHECK: atomicrmw.start: 1225; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1226; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1227; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 1228; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 1229; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 1230; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 1231; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1232; CHECK: atomicrmw.end: 1233; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 1234; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1235; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat 1236; CHECK-NEXT: ret bfloat [[TMP9]] 1237; 1238 %res = atomicrmw xchg ptr addrspace(1) %ptr, bfloat %value syncscope("agent") seq_cst 1239 ret bfloat %res 1240} 1241 1242define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %ptr, bfloat %value) { 1243; CHECK-LABEL: @test_atomicrmw_xchg_bf16_global_agent_align4( 1244; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16 1245; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 1246; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 1247; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1248; CHECK: atomicrmw.start: 1249; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1250; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 1251; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 1252; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4 1253; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1254; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1255; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1256; CHECK: atomicrmw.end: 1257; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 1258; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat 1259; CHECK-NEXT: ret bfloat [[TMP7]] 1260; 1261 %res = atomicrmw xchg ptr addrspace(1) %ptr, bfloat %value syncscope("agent") seq_cst, align 4 1262 ret bfloat %res 1263} 1264 1265define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) { 1266; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent( 1267; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4) 1268; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32 1269; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 1270; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 1271; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 1272; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1273; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 1274; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]] 1275; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4 1276; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1277; CHECK: atomicrmw.start: 1278; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1279; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1280; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] 1281; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4 1282; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 1283; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 1284; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1285; CHECK: atomicrmw.end: 1286; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] 1287; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1288; CHECK-NEXT: ret i16 [[EXTRACTED]] 1289; 1290 %res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst 1291 ret i16 %res 1292} 1293 1294define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) { 1295; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent_align4( 1296; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 1297; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4 1298; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1299; CHECK: atomicrmw.start: 1300; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1301; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536 1302; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]] 1303; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4 1304; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 1305; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 1306; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1307; CHECK: atomicrmw.end: 1308; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 1309; CHECK-NEXT: ret i16 [[EXTRACTED]] 1310; 1311 %res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4 1312 ret i16 %res 1313} 1314 1315define i16 @test_atomicrmw_add_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) { 1316; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent( 1317; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4) 1318; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32 1319; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 1320; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 1321; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] 1322; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 1323; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 1324; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]] 1325; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4 1326; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1327; CHECK: atomicrmw.start: 1328; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1329; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 1330; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] 1331; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 1332; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] 1333; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4 1334; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 1335; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 1336; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1337; CHECK: atomicrmw.end: 1338; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] 1339; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 1340; CHECK-NEXT: ret i16 [[EXTRACTED]] 1341; 1342 %res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst 1343 ret i16 %res 1344} 1345 1346define i16 @test_atomicrmw_add_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) { 1347; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent_align4( 1348; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 1349; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4 1350; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 1351; CHECK: atomicrmw.start: 1352; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 1353; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]] 1354; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535 1355; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 1356; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] 1357; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4 1358; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 1359; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 1360; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1361; CHECK: atomicrmw.end: 1362; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 1363; CHECK-NEXT: ret i16 [[EXTRACTED]] 1364; 1365 %res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4 1366 ret i16 %res 1367} 1368 1369!0 = !{} 1370!1 = !{!"foo", !"bar"} 1371!2 = !{!3} 1372!3 = distinct !{!3, !4} 1373!4 = distinct !{!4} 1374!5 = !{i64 0, i64 4, !1, i64 8, i64 4} 1375!6 = !{!7, !7, i64 0} 1376!7 = !{!"omnipotent char", !8, i64 0} 1377!8 = !{!"Simple C/C++ TBAA"} 1378 1379;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1380; BASE: {{.*}} 1381; GCN: {{.*}} 1382; GFX940: {{.*}} 1383; R600: {{.*}} 1384