1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck %s 3 4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 5 6define i16 @test_atomicrmw_xchg_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 7; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system( 8; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 9; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 10; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 11; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 12; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 13; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 14; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 15; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 16; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 17; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 18; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 19; CHECK: atomicrmw.start: 20; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 21; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 22; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] 23; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 24; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 25; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 26; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 27; CHECK: atomicrmw.end: 28; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 29; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 30; CHECK-NEXT: ret i16 [[EXTRACTED]] 31; 32 %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst 33 ret i16 %res 34} 35 36define i16 @test_atomicrmw_xchg_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) { 37; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system_align4( 38; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 39; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 40; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 41; CHECK: atomicrmw.start: 42; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 43; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536 44; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]] 45; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] seq_cst seq_cst, align 4 46; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 47; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 48; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 49; CHECK: atomicrmw.end: 50; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 51; CHECK-NEXT: ret i16 [[EXTRACTED]] 52; 53 %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst, align 4 54 ret i16 %res 55} 56 57define i16 @test_atomicrmw_add_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 58; CHECK-LABEL: @test_atomicrmw_add_i16_global_system( 59; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 60; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 61; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 62; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 63; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 64; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 65; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 66; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 67; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 68; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 69; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 70; CHECK: atomicrmw.start: 71; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 72; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 73; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] 74; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 75; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] 76; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 77; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 78; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 79; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 80; CHECK: atomicrmw.end: 81; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 82; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 83; CHECK-NEXT: ret i16 [[EXTRACTED]] 84; 85 %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst 86 ret i16 %res 87} 88 89define i16 @test_atomicrmw_add_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) { 90; CHECK-LABEL: @test_atomicrmw_add_i16_global_system_align4( 91; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 92; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 93; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 94; CHECK: atomicrmw.start: 95; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 96; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]] 97; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535 98; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 99; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] 100; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4 101; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 102; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 103; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 104; CHECK: atomicrmw.end: 105; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 106; CHECK-NEXT: ret i16 [[EXTRACTED]] 107; 108 %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst, align 4 109 ret i16 %res 110} 111 112define i16 @test_atomicrmw_sub_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 113; CHECK-LABEL: @test_atomicrmw_sub_i16_global_system( 114; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 115; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 116; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 117; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 118; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 119; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 120; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 121; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 122; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 123; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 124; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 125; CHECK: atomicrmw.start: 126; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 127; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 128; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] 129; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 130; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] 131; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 132; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 133; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 134; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 135; CHECK: atomicrmw.end: 136; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 137; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 138; CHECK-NEXT: ret i16 [[EXTRACTED]] 139; 140 %res = atomicrmw sub ptr addrspace(1) %ptr, i16 %value seq_cst 141 ret i16 %res 142} 143 144define i16 @test_atomicrmw_and_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 145; CHECK-LABEL: @test_atomicrmw_and_i16_global_system( 146; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 147; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 148; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 149; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 150; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 151; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 152; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 153; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 154; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 155; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] 156; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4 157; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 158; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 159; CHECK-NEXT: ret i16 [[EXTRACTED]] 160; 161 %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value seq_cst 162 ret i16 %res 163} 164 165define i16 @test_atomicrmw_nand_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 166; CHECK-LABEL: @test_atomicrmw_nand_i16_global_system( 167; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 168; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 169; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 170; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 171; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 172; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 173; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 174; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 175; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 176; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 177; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 178; CHECK: atomicrmw.start: 179; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 180; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] 181; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP5]], -1 182; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] 183; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 184; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] 185; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 186; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 187; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 188; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 189; CHECK: atomicrmw.end: 190; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 191; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 192; CHECK-NEXT: ret i16 [[EXTRACTED]] 193; 194 %res = atomicrmw nand ptr addrspace(1) %ptr, i16 %value seq_cst 195 ret i16 %res 196} 197 198define i16 @test_atomicrmw_or_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 199; CHECK-LABEL: @test_atomicrmw_or_i16_global_system( 200; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 201; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 202; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 203; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 204; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 205; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 206; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 207; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 208; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 209; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 210; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 211; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 212; CHECK-NEXT: ret i16 [[EXTRACTED]] 213; 214 %res = atomicrmw or ptr addrspace(1) %ptr, i16 %value seq_cst 215 ret i16 %res 216} 217 218define i16 @test_atomicrmw_xor_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 219; CHECK-LABEL: @test_atomicrmw_xor_i16_global_system( 220; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 221; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 222; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 223; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 224; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 225; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 226; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 227; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 228; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 229; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 230; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] 231; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 232; CHECK-NEXT: ret i16 [[EXTRACTED]] 233; 234 %res = atomicrmw xor ptr addrspace(1) %ptr, i16 %value seq_cst 235 ret i16 %res 236} 237 238define i16 @test_atomicrmw_max_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 239; CHECK-LABEL: @test_atomicrmw_max_i16_global_system( 240; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 241; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 242; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 243; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 244; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 245; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 246; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 247; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 248; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 249; CHECK: atomicrmw.start: 250; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 251; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 252; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 253; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]] 254; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 255; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 256; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 257; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 258; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 259; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 260; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 261; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 262; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 263; CHECK: atomicrmw.end: 264; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 265; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 266; CHECK-NEXT: ret i16 [[EXTRACTED3]] 267; 268 %res = atomicrmw max ptr addrspace(1) %ptr, i16 %value seq_cst 269 ret i16 %res 270} 271 272define i16 @test_atomicrmw_min_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 273; CHECK-LABEL: @test_atomicrmw_min_i16_global_system( 274; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 275; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 276; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 277; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 278; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 279; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 280; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 281; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 282; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 283; CHECK: atomicrmw.start: 284; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 285; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 286; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 287; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]] 288; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 289; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 290; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 291; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 292; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 293; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 294; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 295; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 296; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 297; CHECK: atomicrmw.end: 298; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 299; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 300; CHECK-NEXT: ret i16 [[EXTRACTED3]] 301; 302 %res = atomicrmw min ptr addrspace(1) %ptr, i16 %value seq_cst 303 ret i16 %res 304} 305 306define i16 @test_atomicrmw_umax_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 307; CHECK-LABEL: @test_atomicrmw_umax_i16_global_system( 308; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 309; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 310; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 311; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 312; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 313; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 314; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 315; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 316; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 317; CHECK: atomicrmw.start: 318; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 319; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 320; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 321; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 322; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 323; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 324; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 325; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 326; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 327; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 328; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 329; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 330; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 331; CHECK: atomicrmw.end: 332; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 333; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 334; CHECK-NEXT: ret i16 [[EXTRACTED3]] 335; 336 %res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value seq_cst 337 ret i16 %res 338} 339 340define i16 @test_atomicrmw_umin_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 341; CHECK-LABEL: @test_atomicrmw_umin_i16_global_system( 342; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 343; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 344; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 345; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 346; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 347; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 348; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 349; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 350; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 351; CHECK: atomicrmw.start: 352; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 353; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 354; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 355; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]] 356; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] 357; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 358; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 359; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 360; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 361; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 362; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 363; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 364; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 365; CHECK: atomicrmw.end: 366; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 367; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 368; CHECK-NEXT: ret i16 [[EXTRACTED3]] 369; 370 %res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value seq_cst 371 ret i16 %res 372} 373 374define i16 @test_cmpxchg_i16_global_system(ptr addrspace(1) %out, i16 %in, i16 %old) { 375; CHECK-LABEL: @test_cmpxchg_i16_global_system( 376; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4 377; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4) 378; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64 379; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 380; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 381; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 382; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 383; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 384; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32 385; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] 386; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 387; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] 388; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 389; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] 390; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] 391; CHECK: partword.cmpxchg.loop: 392; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] 393; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] 394; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] 395; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 396; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 397; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 398; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] 399; CHECK: partword.cmpxchg.failure: 400; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] 401; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] 402; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] 403; CHECK: partword.cmpxchg.end: 404; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] 405; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 406; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 407; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 408; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 409; CHECK-NEXT: ret i16 [[EXTRACT]] 410; 411 %gep = getelementptr i16, ptr addrspace(1) %out, i64 4 412 %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst 413 %extract = extractvalue {i16, i1} %res, 0 414 ret i16 %extract 415} 416 417define i16 @test_cmpxchg_i16_global_system_align4(ptr addrspace(1) %out, i16 %in, i16 %old) { 418; CHECK-LABEL: @test_cmpxchg_i16_global_system_align4( 419; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4 420; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32 421; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32 422; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 423; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -65536 424; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] 425; CHECK: partword.cmpxchg.loop: 426; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] 427; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]] 428; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]] 429; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[GEP]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4 430; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 431; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 432; CHECK-NEXT: br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] 433; CHECK: partword.cmpxchg.failure: 434; CHECK-NEXT: [[TMP11]] = and i32 [[TMP9]], -65536 435; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]] 436; CHECK-NEXT: br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] 437; CHECK: partword.cmpxchg.end: 438; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16 439; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 440; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1 441; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0 442; CHECK-NEXT: ret i16 [[EXTRACT]] 443; 444 %gep = getelementptr i16, ptr addrspace(1) %out, i64 4 445 %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst, align 4 446 %extract = extractvalue {i16, i1} %res, 0 447 ret i16 %extract 448} 449 450define i16 @test_atomicrmw_inc_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 451; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system( 452; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 453; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 454; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 455; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 456; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 457; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 458; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 459; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 460; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 461; CHECK: atomicrmw.start: 462; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 463; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 464; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 465; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 466; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 467; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] 468; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 469; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 470; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 471; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 472; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 473; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 474; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 475; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 476; CHECK: atomicrmw.end: 477; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 478; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 479; CHECK-NEXT: ret i16 [[EXTRACTED3]] 480; 481 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst 482 ret i16 %res 483} 484 485define i16 @test_atomicrmw_inc_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) { 486; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system_align4( 487; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 488; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 489; CHECK: atomicrmw.start: 490; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 491; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 492; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 493; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 494; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] 495; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 496; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 497; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 498; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 499; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 500; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 501; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 502; CHECK: atomicrmw.end: 503; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 504; CHECK-NEXT: ret i16 [[EXTRACTED1]] 505; 506 %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4 507 ret i16 %res 508} 509 510define i16 @test_atomicrmw_inc_i16_flat_system(ptr %ptr, i16 %value) { 511; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system( 512; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 513; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 514; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 515; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 516; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 517; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 518; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 519; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 520; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 521; CHECK: atomicrmw.start: 522; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 523; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 524; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 525; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 526; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 527; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] 528; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 529; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 530; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 531; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 532; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 533; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 534; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 535; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 536; CHECK: atomicrmw.end: 537; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 538; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 539; CHECK-NEXT: ret i16 [[EXTRACTED3]] 540; 541 %res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst 542 ret i16 %res 543} 544 545define i16 @test_atomicrmw_inc_i16_flat_system_align4(ptr %ptr, i16 %value) { 546; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system_align4( 547; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 548; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 549; CHECK: atomicrmw.start: 550; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 551; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 552; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 553; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] 554; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] 555; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 556; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 557; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 558; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 559; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 560; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 561; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 562; CHECK: atomicrmw.end: 563; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 564; CHECK-NEXT: ret i16 [[EXTRACTED1]] 565; 566 %res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst, align 4 567 ret i16 %res 568} 569 570define i16 @test_atomicrmw_dec_i16_global_system(ptr addrspace(1) %ptr, i16 %value) { 571; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system( 572; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 573; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 574; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 575; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 576; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 577; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 578; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 579; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 580; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 581; CHECK: atomicrmw.start: 582; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 583; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 584; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 585; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 586; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 587; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 588; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 589; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] 590; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 591; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 592; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 593; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 594; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 595; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 596; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 597; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 598; CHECK: atomicrmw.end: 599; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 600; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 601; CHECK-NEXT: ret i16 [[EXTRACTED3]] 602; 603 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst 604 ret i16 %res 605} 606 607define i16 @test_atomicrmw_dec_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) { 608; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system_align4( 609; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 610; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 611; CHECK: atomicrmw.start: 612; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 613; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 614; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 615; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 616; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 617; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 618; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] 619; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 620; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 621; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 622; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 623; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 624; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 625; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 626; CHECK: atomicrmw.end: 627; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 628; CHECK-NEXT: ret i16 [[EXTRACTED1]] 629; 630 %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4 631 ret i16 %res 632} 633 634define i16 @test_atomicrmw_dec_i16_flat_system(ptr %ptr, i16 %value) { 635; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system( 636; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 637; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 638; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 639; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 640; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 641; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 642; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 643; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 644; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 645; CHECK: atomicrmw.start: 646; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 647; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] 648; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 649; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 650; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 651; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 652; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 653; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] 654; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 655; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] 656; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 657; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] 658; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 659; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 660; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 661; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 662; CHECK: atomicrmw.end: 663; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 664; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 665; CHECK-NEXT: ret i16 [[EXTRACTED3]] 666; 667 %res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst 668 ret i16 %res 669} 670 671define i16 @test_atomicrmw_dec_i16_flat_system_align4(ptr %ptr, i16 %value) { 672; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system_align4( 673; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 674; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 675; CHECK: atomicrmw.start: 676; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 677; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 678; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 679; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 680; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] 681; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 682; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] 683; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 684; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 685; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] 686; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 687; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 688; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 689; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 690; CHECK: atomicrmw.end: 691; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 692; CHECK-NEXT: ret i16 [[EXTRACTED1]] 693; 694 %res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst, align 4 695 ret i16 %res 696} 697 698define half @test_atomicrmw_xchg_f16_global_system(ptr addrspace(1) %ptr, half %value) { 699; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system( 700; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) 701; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 702; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 703; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 704; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 705; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 706; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 707; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16 708; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 709; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 710; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 711; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 712; CHECK: atomicrmw.start: 713; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 714; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 715; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 716; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 717; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 718; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 719; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 720; CHECK: atomicrmw.end: 721; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 722; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 723; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half 724; CHECK-NEXT: ret half [[TMP9]] 725; 726 %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst 727 ret half %res 728} 729 730define half @test_atomicrmw_xchg_f16_global_system_align4(ptr addrspace(1) %ptr, half %value) { 731; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system_align4( 732; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16 733; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 734; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 735; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 736; CHECK: atomicrmw.start: 737; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 738; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 739; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 740; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4 741; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 742; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 743; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 744; CHECK: atomicrmw.end: 745; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 746; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half 747; CHECK-NEXT: ret half [[TMP7]] 748; 749 %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst, align 4 750 ret half %res 751} 752 753define half @test_atomicrmw_xchg_f16_flat_system(ptr %ptr, half %value) { 754; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system( 755; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 756; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 757; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 758; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 759; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 760; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 761; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 762; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16 763; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 764; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 765; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 766; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 767; CHECK: atomicrmw.start: 768; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 769; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 770; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 771; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 772; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 773; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 774; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 775; CHECK: atomicrmw.end: 776; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 777; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 778; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half 779; CHECK-NEXT: ret half [[TMP9]] 780; 781 %res = atomicrmw xchg ptr %ptr, half %value seq_cst 782 ret half %res 783} 784 785define half @test_atomicrmw_xchg_f16_flat_system_align4(ptr %ptr, half %value) { 786; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system_align4( 787; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16 788; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 789; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4 790; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 791; CHECK: atomicrmw.start: 792; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 793; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 794; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 795; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4 796; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 797; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 798; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 799; CHECK: atomicrmw.end: 800; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 801; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half 802; CHECK-NEXT: ret half [[TMP7]] 803; 804 %res = atomicrmw xchg ptr %ptr, half %value seq_cst, align 4 805 ret half %res 806} 807 808define bfloat @test_atomicrmw_xchg_bf16_flat_system(ptr %ptr, bfloat %value) { 809; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system( 810; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 811; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 812; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 813; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 814; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 815; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] 816; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 817; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16 818; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 819; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] 820; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 821; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 822; CHECK: atomicrmw.start: 823; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 824; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] 825; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] 826; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 827; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 828; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 829; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 830; CHECK: atomicrmw.end: 831; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] 832; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 833; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat 834; CHECK-NEXT: ret bfloat [[TMP9]] 835; 836 %res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst 837 ret bfloat %res 838} 839 840define bfloat @test_atomicrmw_xchg_bf16_flat_system_align4(ptr %ptr, bfloat %value) { 841; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system_align4( 842; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16 843; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 844; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4 845; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] 846; CHECK: atomicrmw.start: 847; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] 848; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 849; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] 850; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4 851; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 852; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 853; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 854; CHECK: atomicrmw.end: 855; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 856; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat 857; CHECK-NEXT: ret bfloat [[TMP7]] 858; 859 %res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst, align 4 860 ret bfloat %res 861} 862