1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s 5; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s 6; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s 7; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s 8; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s 9; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s 10 11;--------------------------------------------------------------------- 12; atomicrmw xchg 13;--------------------------------------------------------------------- 14 15; xchg is supported over PCIe, so no expansion is necessary 16define float @test_atomicrmw_xchg_f32_global_system(ptr addrspace(1) %ptr, float %value) { 17; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system( 18; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { 19; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4 20; COMMON-NEXT: ret float [[RES]] 21; 22 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst 23 ret float %res 24} 25 26; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 27define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 28; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory( 29; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 30; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] 31; COMMON-NEXT: ret float [[RES]] 32; 33 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 34 ret float %res 35} 36 37; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 38define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 39; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_remote_memory( 40; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 41; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 42; COMMON-NEXT: ret float [[RES]] 43; 44 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 45 ret float %res 46} 47 48; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 49define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 50; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 51; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 52; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 53; COMMON-NEXT: ret float [[RES]] 54; 55 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 56 ret float %res 57} 58 59;--------------------------------------------------------------------- 60; atomicrmw fadd 61;--------------------------------------------------------------------- 62 63define float @test_atomicrmw_fadd_f32_global_system(ptr addrspace(1) %ptr, float %value) { 64; COMMON-LABEL: define float @test_atomicrmw_fadd_f32_global_system( 65; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 66; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 67; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 68; COMMON: atomicrmw.start: 69; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 70; COMMON-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 71; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 72; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 73; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 74; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 75; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 76; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 77; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 78; COMMON: atomicrmw.end: 79; COMMON-NEXT: ret float [[TMP5]] 80; 81 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst 82 ret float %res 83} 84 85define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 86; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 87; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 88; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 89; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 90; GFX803: atomicrmw.start: 91; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 92; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 93; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 94; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 95; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 96; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 97; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 98; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 99; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 100; GFX803: atomicrmw.end: 101; GFX803-NEXT: ret float [[TMP5]] 102; 103; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 104; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 105; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 106; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 107; GFX906: atomicrmw.start: 108; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 109; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 110; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 111; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 112; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 113; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 114; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 115; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 116; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 117; GFX906: atomicrmw.end: 118; GFX906-NEXT: ret float [[TMP5]] 119; 120; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 121; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 122; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 123; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 124; GFX908: atomicrmw.start: 125; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 126; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 127; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 128; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 129; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 130; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 131; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 132; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 133; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 134; GFX908: atomicrmw.end: 135; GFX908-NEXT: ret float [[TMP5]] 136; 137; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 138; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 139; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 140; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 141; GFX90A: atomicrmw.start: 142; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 143; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 144; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 145; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 146; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 147; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 148; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 149; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 150; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 151; GFX90A: atomicrmw.end: 152; GFX90A-NEXT: ret float [[TMP5]] 153; 154; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 155; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 156; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 157; GFX940-NEXT: ret float [[RES]] 158; 159; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 160; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 161; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 162; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 163; GFX10: atomicrmw.start: 164; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 165; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 166; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 167; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 168; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 169; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 170; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 171; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 172; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 173; GFX10: atomicrmw.end: 174; GFX10-NEXT: ret float [[TMP5]] 175; 176; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 177; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 178; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 179; GFX11-NEXT: ret float [[TMP5]] 180; 181; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( 182; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 183; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 184; GFX12-NEXT: ret float [[TMP5]] 185; 186 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 187 ret float %res 188} 189 190define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 191; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 192; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 193; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 194; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 195; GFX803: atomicrmw.start: 196; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 197; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 198; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 199; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 200; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 201; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 202; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 203; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 204; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 205; GFX803: atomicrmw.end: 206; GFX803-NEXT: ret float [[TMP5]] 207; 208; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 209; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 210; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 211; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 212; GFX906: atomicrmw.start: 213; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 214; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 215; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 216; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 217; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 218; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 219; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 220; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 221; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 222; GFX906: atomicrmw.end: 223; GFX906-NEXT: ret float [[TMP5]] 224; 225; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 226; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 227; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 228; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 229; GFX908: atomicrmw.start: 230; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 231; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 232; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 233; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 234; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 235; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 236; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 237; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 238; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 239; GFX908: atomicrmw.end: 240; GFX908-NEXT: ret float [[TMP5]] 241; 242; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 243; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 244; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 245; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 246; GFX90A: atomicrmw.start: 247; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 248; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 249; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 250; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 251; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 252; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 253; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 254; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 255; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 256; GFX90A: atomicrmw.end: 257; GFX90A-NEXT: ret float [[TMP5]] 258; 259; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 260; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 261; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 262; GFX940-NEXT: ret float [[RES]] 263; 264; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 265; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 266; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 267; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 268; GFX10: atomicrmw.start: 269; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 270; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 271; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 272; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 273; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 274; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 275; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 276; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 277; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 278; GFX10: atomicrmw.end: 279; GFX10-NEXT: ret float [[TMP5]] 280; 281; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 282; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 283; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 284; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 285; GFX11: atomicrmw.start: 286; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 287; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 288; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 289; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 290; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 291; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 292; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 293; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 294; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 295; GFX11: atomicrmw.end: 296; GFX11-NEXT: ret float [[TMP5]] 297; 298; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( 299; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 300; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 301; GFX12-NEXT: ret float [[TMP5]] 302; 303 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 304 ret float %res 305} 306 307define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 308; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 309; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 310; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 311; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 312; GFX803: atomicrmw.start: 313; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 314; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 315; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 316; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 317; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 318; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 319; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 320; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 321; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 322; GFX803: atomicrmw.end: 323; GFX803-NEXT: ret float [[TMP5]] 324; 325; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 326; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 327; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 328; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 329; GFX906: atomicrmw.start: 330; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 331; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 332; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 333; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 334; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 335; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 336; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 337; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 338; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 339; GFX906: atomicrmw.end: 340; GFX906-NEXT: ret float [[TMP5]] 341; 342; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 343; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 344; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 345; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 346; GFX908: atomicrmw.start: 347; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 348; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 349; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 350; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 351; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 352; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 353; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 354; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 355; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 356; GFX908: atomicrmw.end: 357; GFX908-NEXT: ret float [[TMP5]] 358; 359; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 360; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 361; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 362; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 363; GFX90A: atomicrmw.start: 364; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 365; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 366; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 367; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 368; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 369; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 370; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 371; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 372; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 373; GFX90A: atomicrmw.end: 374; GFX90A-NEXT: ret float [[TMP5]] 375; 376; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 377; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 378; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 379; GFX940-NEXT: ret float [[RES]] 380; 381; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 382; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 383; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 384; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 385; GFX10: atomicrmw.start: 386; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 387; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 388; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 389; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 390; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 391; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 392; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 393; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 394; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 395; GFX10: atomicrmw.end: 396; GFX10-NEXT: ret float [[TMP5]] 397; 398; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 399; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 400; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 401; GFX11-NEXT: ret float [[TMP5]] 402; 403; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 404; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 405; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 406; GFX12-NEXT: ret float [[TMP5]] 407; 408 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 409 ret float %res 410} 411 412define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 { 413; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 414; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 415; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 416; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 417; GFX803: atomicrmw.start: 418; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 419; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 420; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 421; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 422; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 423; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 424; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 425; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 426; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 427; GFX803: atomicrmw.end: 428; GFX803-NEXT: ret float [[TMP5]] 429; 430; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 431; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 432; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 433; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 434; GFX906: atomicrmw.start: 435; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 436; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 437; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 438; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 439; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 440; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 441; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 442; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 443; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 444; GFX906: atomicrmw.end: 445; GFX906-NEXT: ret float [[TMP5]] 446; 447; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 448; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 449; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 450; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 451; GFX908: atomicrmw.start: 452; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 453; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 454; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 455; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 456; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 457; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 458; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 459; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 460; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 461; GFX908: atomicrmw.end: 462; GFX908-NEXT: ret float [[TMP5]] 463; 464; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 465; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 466; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 467; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 468; GFX90A: atomicrmw.start: 469; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 470; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 471; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 472; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 473; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 474; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 475; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 476; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 477; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 478; GFX90A: atomicrmw.end: 479; GFX90A-NEXT: ret float [[TMP5]] 480; 481; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 482; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 483; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 484; GFX940-NEXT: ret float [[RES]] 485; 486; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 487; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 488; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 489; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 490; GFX10: atomicrmw.start: 491; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 492; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 493; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 494; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 495; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 496; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 497; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 498; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 499; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 500; GFX10: atomicrmw.end: 501; GFX10-NEXT: ret float [[TMP5]] 502; 503; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 504; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 505; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 506; GFX11-NEXT: ret float [[TMP5]] 507; 508; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 509; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 510; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 511; GFX12-NEXT: ret float [[TMP5]] 512; 513 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 514 ret float %res 515} 516 517define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 518; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 519; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 520; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 521; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 522; GFX803: atomicrmw.start: 523; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 524; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 525; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 526; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 527; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 528; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 529; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 530; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 531; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 532; GFX803: atomicrmw.end: 533; GFX803-NEXT: ret float [[TMP5]] 534; 535; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 536; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 537; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 538; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 539; GFX906: atomicrmw.start: 540; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 541; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 542; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 543; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 544; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 545; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 546; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 547; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 548; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 549; GFX906: atomicrmw.end: 550; GFX906-NEXT: ret float [[TMP5]] 551; 552; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 553; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 554; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 555; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 556; GFX908: atomicrmw.start: 557; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 558; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 559; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 560; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 561; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 562; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 563; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 564; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 565; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 566; GFX908: atomicrmw.end: 567; GFX908-NEXT: ret float [[TMP5]] 568; 569; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 570; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 571; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 572; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 573; GFX90A: atomicrmw.start: 574; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 575; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 576; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 577; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 578; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 579; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 580; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 581; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 582; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 583; GFX90A: atomicrmw.end: 584; GFX90A-NEXT: ret float [[TMP5]] 585; 586; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 587; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 588; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 589; GFX940-NEXT: ret float [[RES]] 590; 591; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 592; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 593; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 594; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 595; GFX10: atomicrmw.start: 596; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 597; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 598; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 599; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 600; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 601; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 602; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 603; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 604; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 605; GFX10: atomicrmw.end: 606; GFX10-NEXT: ret float [[TMP5]] 607; 608; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 609; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 610; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 611; GFX11-NEXT: ret float [[TMP5]] 612; 613; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 614; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 615; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 616; GFX12-NEXT: ret float [[TMP5]] 617; 618 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 619 ret float %res 620} 621 622define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 623; COMMON-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode( 624; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 625; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 626; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 627; COMMON: atomicrmw.start: 628; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 629; COMMON-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 630; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 631; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 632; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 633; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 634; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 635; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 636; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 637; COMMON: atomicrmw.end: 638; COMMON-NEXT: ret float [[TMP5]] 639; 640 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 641 ret float %res 642} 643 644define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 645; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 646; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 647; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 648; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 649; GFX803: atomicrmw.start: 650; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 651; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 652; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 653; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 654; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 655; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 656; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 657; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 658; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 659; GFX803: atomicrmw.end: 660; GFX803-NEXT: ret float [[TMP5]] 661; 662; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 663; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 664; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 665; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 666; GFX906: atomicrmw.start: 667; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 668; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 669; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 670; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 671; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 672; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 673; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 674; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 675; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 676; GFX906: atomicrmw.end: 677; GFX906-NEXT: ret float [[TMP5]] 678; 679; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 680; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 681; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 682; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 683; GFX908: atomicrmw.start: 684; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 685; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 686; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 687; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 688; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 689; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 690; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 691; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 692; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 693; GFX908: atomicrmw.end: 694; GFX908-NEXT: ret float [[TMP5]] 695; 696; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 697; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 698; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 699; GFX90A-NEXT: ret float [[TMP5]] 700; 701; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 702; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 703; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 704; GFX940-NEXT: ret float [[RES]] 705; 706; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 707; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 708; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 709; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 710; GFX10: atomicrmw.start: 711; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 712; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 713; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 714; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 715; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 716; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 717; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 718; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 719; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 720; GFX10: atomicrmw.end: 721; GFX10-NEXT: ret float [[TMP5]] 722; 723; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 724; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 725; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 726; GFX11-NEXT: ret float [[TMP5]] 727; 728; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 729; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 730; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 731; GFX12-NEXT: ret float [[TMP5]] 732; 733 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 734 ret float %res 735} 736 737define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 738; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 739; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 740; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 741; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 742; GFX803: atomicrmw.start: 743; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 744; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 745; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 746; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 747; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 748; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 749; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 750; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 751; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 752; GFX803: atomicrmw.end: 753; GFX803-NEXT: ret float [[TMP5]] 754; 755; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 756; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 757; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 758; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 759; GFX906: atomicrmw.start: 760; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 761; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 762; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 763; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 764; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 765; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 766; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 767; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 768; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 769; GFX906: atomicrmw.end: 770; GFX906-NEXT: ret float [[TMP5]] 771; 772; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 773; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 774; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 775; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 776; GFX908: atomicrmw.start: 777; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 778; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 779; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 780; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 781; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 782; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 783; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 784; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 785; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 786; GFX908: atomicrmw.end: 787; GFX908-NEXT: ret float [[TMP5]] 788; 789; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 790; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 791; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 792; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 793; GFX90A: atomicrmw.start: 794; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 795; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 796; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 797; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 798; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 799; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 800; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 801; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 802; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 803; GFX90A: atomicrmw.end: 804; GFX90A-NEXT: ret float [[TMP5]] 805; 806; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 807; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 808; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 809; GFX940-NEXT: ret float [[RES]] 810; 811; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 812; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 813; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 814; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 815; GFX10: atomicrmw.start: 816; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 817; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 818; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 819; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 820; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 821; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 822; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 823; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 824; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 825; GFX10: atomicrmw.end: 826; GFX10-NEXT: ret float [[TMP5]] 827; 828; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 829; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 830; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 831; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 832; GFX11: atomicrmw.start: 833; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 834; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 835; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 836; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 837; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 838; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 839; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 840; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 841; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 842; GFX11: atomicrmw.end: 843; GFX11-NEXT: ret float [[TMP5]] 844; 845; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 846; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 847; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 848; GFX12-NEXT: ret float [[TMP5]] 849; 850 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 851 ret float %res 852} 853 854define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 855; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 856; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 857; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 858; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 859; GFX803: atomicrmw.start: 860; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 861; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 862; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 863; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 864; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 865; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 866; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 867; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 868; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 869; GFX803: atomicrmw.end: 870; GFX803-NEXT: ret float [[TMP5]] 871; 872; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 873; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 874; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 875; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 876; GFX906: atomicrmw.start: 877; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 878; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 879; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 880; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 881; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 882; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 883; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 884; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 885; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 886; GFX906: atomicrmw.end: 887; GFX906-NEXT: ret float [[TMP5]] 888; 889; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 890; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 891; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 892; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 893; GFX908: atomicrmw.start: 894; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 895; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 896; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 897; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 898; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 899; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 900; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 901; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 902; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 903; GFX908: atomicrmw.end: 904; GFX908-NEXT: ret float [[TMP5]] 905; 906; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 907; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 908; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 909; GFX90A-NEXT: ret float [[TMP5]] 910; 911; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 912; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 913; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 914; GFX940-NEXT: ret float [[RES]] 915; 916; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 917; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 918; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 919; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 920; GFX10: atomicrmw.start: 921; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 922; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 923; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 924; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 925; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 926; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 927; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 928; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 929; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 930; GFX10: atomicrmw.end: 931; GFX10-NEXT: ret float [[TMP5]] 932; 933; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 934; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 935; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 936; GFX11-NEXT: ret float [[TMP5]] 937; 938; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 939; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 940; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 941; GFX12-NEXT: ret float [[TMP5]] 942; 943 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 944 ret float %res 945} 946 947define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 { 948; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 949; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 950; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 951; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 952; GFX803: atomicrmw.start: 953; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 954; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 955; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 956; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 957; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 958; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 959; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 960; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 961; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 962; GFX803: atomicrmw.end: 963; GFX803-NEXT: ret float [[TMP5]] 964; 965; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 966; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 967; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 968; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 969; GFX906: atomicrmw.start: 970; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 971; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 972; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 973; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 974; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 975; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 976; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 977; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 978; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 979; GFX906: atomicrmw.end: 980; GFX906-NEXT: ret float [[TMP5]] 981; 982; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 983; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 984; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 985; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 986; GFX908: atomicrmw.start: 987; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 988; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 989; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 990; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 991; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 992; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 993; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 994; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 995; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 996; GFX908: atomicrmw.end: 997; GFX908-NEXT: ret float [[TMP5]] 998; 999; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1000; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1001; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1002; GFX90A-NEXT: ret float [[TMP5]] 1003; 1004; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1005; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1006; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1007; GFX940-NEXT: ret float [[RES]] 1008; 1009; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1010; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1011; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1012; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1013; GFX10: atomicrmw.start: 1014; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1015; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1016; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1017; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1018; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1019; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1020; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1021; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1022; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1023; GFX10: atomicrmw.end: 1024; GFX10-NEXT: ret float [[TMP5]] 1025; 1026; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1027; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1028; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1029; GFX11-NEXT: ret float [[TMP5]] 1030; 1031; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1032; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1033; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1034; GFX12-NEXT: ret float [[TMP5]] 1035; 1036 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1037 ret float %res 1038} 1039 1040define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 1041; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1042; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1043; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1044; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1045; GFX803: atomicrmw.start: 1046; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1047; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1048; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1049; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1050; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1051; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1052; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1053; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1054; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1055; GFX803: atomicrmw.end: 1056; GFX803-NEXT: ret float [[TMP5]] 1057; 1058; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1059; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1060; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1061; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1062; GFX906: atomicrmw.start: 1063; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1064; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1065; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1066; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1067; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1068; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1069; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1070; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1071; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1072; GFX906: atomicrmw.end: 1073; GFX906-NEXT: ret float [[TMP5]] 1074; 1075; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1076; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1077; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1078; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1079; GFX908: atomicrmw.start: 1080; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1081; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1082; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1083; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1084; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1085; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1086; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1087; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1088; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1089; GFX908: atomicrmw.end: 1090; GFX908-NEXT: ret float [[TMP5]] 1091; 1092; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1093; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1094; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1095; GFX90A-NEXT: ret float [[TMP5]] 1096; 1097; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1098; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1099; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1100; GFX940-NEXT: ret float [[RES]] 1101; 1102; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1103; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1104; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1105; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1106; GFX10: atomicrmw.start: 1107; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1108; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1109; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1110; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1111; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1112; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1113; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1114; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1115; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1116; GFX10: atomicrmw.end: 1117; GFX10-NEXT: ret float [[TMP5]] 1118; 1119; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1120; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1121; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1122; GFX11-NEXT: ret float [[TMP5]] 1123; 1124; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1125; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1126; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1127; GFX12-NEXT: ret float [[TMP5]] 1128; 1129 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1130 ret float %res 1131} 1132 1133;--------------------------------------------------------------------- 1134; atomicrmw fadd (no return) 1135;--------------------------------------------------------------------- 1136 1137define void @test_atomicrmw_fadd_noret_f32_global_system(ptr addrspace(1) %ptr, float %value) { 1138; COMMON-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system( 1139; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1140; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1141; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 1142; COMMON: atomicrmw.start: 1143; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1144; COMMON-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1145; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1146; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1147; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 1148; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1149; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1150; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1151; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1152; COMMON: atomicrmw.end: 1153; COMMON-NEXT: ret void 1154; 1155 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst 1156 ret void 1157} 1158 1159define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 1160; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1161; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1162; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1163; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1164; GFX803: atomicrmw.start: 1165; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1166; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1167; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1168; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1169; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1170; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1171; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1172; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1173; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1174; GFX803: atomicrmw.end: 1175; GFX803-NEXT: ret void 1176; 1177; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1178; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1179; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1180; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1181; GFX906: atomicrmw.start: 1182; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1183; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1184; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1185; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1186; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1187; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1188; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1189; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1190; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1191; GFX906: atomicrmw.end: 1192; GFX906-NEXT: ret void 1193; 1194; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1195; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1196; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1197; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1198; GFX908: atomicrmw.start: 1199; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1200; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1201; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1202; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1203; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1204; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1205; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1206; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1207; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1208; GFX908: atomicrmw.end: 1209; GFX908-NEXT: ret void 1210; 1211; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1212; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1213; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1214; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1215; GFX90A: atomicrmw.start: 1216; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1217; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1218; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1219; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1220; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1221; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1222; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1223; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1224; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1225; GFX90A: atomicrmw.end: 1226; GFX90A-NEXT: ret void 1227; 1228; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1229; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1230; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1231; GFX940-NEXT: ret void 1232; 1233; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1234; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1235; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1236; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1237; GFX10: atomicrmw.start: 1238; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1239; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1240; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1241; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1242; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1243; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1244; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1245; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1246; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1247; GFX10: atomicrmw.end: 1248; GFX10-NEXT: ret void 1249; 1250; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1251; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1252; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1253; GFX11-NEXT: ret void 1254; 1255; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( 1256; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1257; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1258; GFX12-NEXT: ret void 1259; 1260 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 1261 ret void 1262} 1263 1264define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1265; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1266; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1267; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1268; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1269; GFX803: atomicrmw.start: 1270; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1271; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1272; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1273; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1274; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1275; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1276; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1277; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1278; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1279; GFX803: atomicrmw.end: 1280; GFX803-NEXT: ret void 1281; 1282; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1283; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1284; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1285; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1286; GFX906: atomicrmw.start: 1287; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1288; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1289; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1290; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1291; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1292; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1293; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1294; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1295; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1296; GFX906: atomicrmw.end: 1297; GFX906-NEXT: ret void 1298; 1299; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1300; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1301; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1302; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1303; GFX908: atomicrmw.start: 1304; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1305; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1306; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1307; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1308; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1309; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1310; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1311; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1312; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1313; GFX908: atomicrmw.end: 1314; GFX908-NEXT: ret void 1315; 1316; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1317; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1318; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1319; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1320; GFX90A: atomicrmw.start: 1321; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1322; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1323; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1324; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1325; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1326; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1327; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1328; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1329; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1330; GFX90A: atomicrmw.end: 1331; GFX90A-NEXT: ret void 1332; 1333; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1334; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1335; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1336; GFX940-NEXT: ret void 1337; 1338; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1339; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1340; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1341; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1342; GFX10: atomicrmw.start: 1343; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1344; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1345; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1346; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1347; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1348; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1349; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1350; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1351; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1352; GFX10: atomicrmw.end: 1353; GFX10-NEXT: ret void 1354; 1355; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1356; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1357; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1358; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 1359; GFX11: atomicrmw.start: 1360; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1361; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1362; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1363; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1364; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1365; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1366; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1367; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1368; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1369; GFX11: atomicrmw.end: 1370; GFX11-NEXT: ret void 1371; 1372; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( 1373; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1374; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1375; GFX12-NEXT: ret void 1376; 1377 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 1378 ret void 1379} 1380 1381define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1382; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1383; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1384; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1385; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1386; GFX803: atomicrmw.start: 1387; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1388; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1389; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1390; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1391; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1392; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1393; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1394; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1395; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1396; GFX803: atomicrmw.end: 1397; GFX803-NEXT: ret void 1398; 1399; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1400; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1401; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1402; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1403; GFX906: atomicrmw.start: 1404; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1405; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1406; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1407; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1408; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1409; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1410; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1411; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1412; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1413; GFX906: atomicrmw.end: 1414; GFX906-NEXT: ret void 1415; 1416; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1417; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1418; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1419; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1420; GFX908: atomicrmw.start: 1421; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1422; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1423; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1424; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1425; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1426; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1427; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1428; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1429; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1430; GFX908: atomicrmw.end: 1431; GFX908-NEXT: ret void 1432; 1433; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1434; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1435; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1436; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1437; GFX90A: atomicrmw.start: 1438; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1439; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1440; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1441; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1442; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1443; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1444; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1445; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1446; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1447; GFX90A: atomicrmw.end: 1448; GFX90A-NEXT: ret void 1449; 1450; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1451; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1452; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1453; GFX940-NEXT: ret void 1454; 1455; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1456; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1457; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1458; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1459; GFX10: atomicrmw.start: 1460; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1461; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1462; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1463; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1464; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1465; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1466; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1467; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1468; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1469; GFX10: atomicrmw.end: 1470; GFX10-NEXT: ret void 1471; 1472; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1473; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1474; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1475; GFX11-NEXT: ret void 1476; 1477; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1478; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1479; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1480; GFX12-NEXT: ret void 1481; 1482 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1483 ret void 1484} 1485 1486define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 { 1487; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1488; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1489; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1490; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1491; GFX803: atomicrmw.start: 1492; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1493; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1494; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1495; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1496; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1497; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1498; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1499; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1500; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1501; GFX803: atomicrmw.end: 1502; GFX803-NEXT: ret void 1503; 1504; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1505; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1506; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1507; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1508; GFX906: atomicrmw.start: 1509; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1510; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1511; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1512; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1513; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1514; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1515; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1516; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1517; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1518; GFX906: atomicrmw.end: 1519; GFX906-NEXT: ret void 1520; 1521; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1522; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1523; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1524; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1525; GFX908: atomicrmw.start: 1526; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1527; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1528; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1529; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1530; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1531; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1532; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1533; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1534; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1535; GFX908: atomicrmw.end: 1536; GFX908-NEXT: ret void 1537; 1538; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1539; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1540; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1541; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1542; GFX90A: atomicrmw.start: 1543; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1544; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1545; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1546; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1547; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1548; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1549; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1550; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1551; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1552; GFX90A: atomicrmw.end: 1553; GFX90A-NEXT: ret void 1554; 1555; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1556; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1557; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1558; GFX940-NEXT: ret void 1559; 1560; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1561; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1562; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1563; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1564; GFX10: atomicrmw.start: 1565; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1566; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1567; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1568; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1569; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1570; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1571; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1572; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1573; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1574; GFX10: atomicrmw.end: 1575; GFX10-NEXT: ret void 1576; 1577; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1578; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1579; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1580; GFX11-NEXT: ret void 1581; 1582; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1583; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1584; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1585; GFX12-NEXT: ret void 1586; 1587 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1588 ret void 1589} 1590 1591define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 1592; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1593; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1594; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1595; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1596; GFX803: atomicrmw.start: 1597; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1598; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1599; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1600; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1601; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1602; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1603; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1604; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1605; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1606; GFX803: atomicrmw.end: 1607; GFX803-NEXT: ret void 1608; 1609; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1610; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1611; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1612; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1613; GFX906: atomicrmw.start: 1614; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1615; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1616; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1617; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1618; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1619; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1620; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1621; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1622; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1623; GFX906: atomicrmw.end: 1624; GFX906-NEXT: ret void 1625; 1626; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1627; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1628; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1629; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1630; GFX908: atomicrmw.start: 1631; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1632; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1633; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1634; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1635; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1636; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1637; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1638; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1639; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1640; GFX908: atomicrmw.end: 1641; GFX908-NEXT: ret void 1642; 1643; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1644; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1645; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1646; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1647; GFX90A: atomicrmw.start: 1648; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1649; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1650; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1651; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1652; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1653; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1654; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1655; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1656; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1657; GFX90A: atomicrmw.end: 1658; GFX90A-NEXT: ret void 1659; 1660; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1661; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1662; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1663; GFX940-NEXT: ret void 1664; 1665; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1666; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1667; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1668; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1669; GFX10: atomicrmw.start: 1670; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1671; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1672; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1673; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1674; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1675; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1676; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1677; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1678; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1679; GFX10: atomicrmw.end: 1680; GFX10-NEXT: ret void 1681; 1682; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1683; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1684; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1685; GFX11-NEXT: ret void 1686; 1687; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1688; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1689; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1690; GFX12-NEXT: ret void 1691; 1692 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1693 ret void 1694} 1695 1696define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 1697; COMMON-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode( 1698; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1699; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1700; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 1701; COMMON: atomicrmw.start: 1702; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1703; COMMON-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1704; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1705; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1706; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 1707; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1708; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1709; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1710; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1711; COMMON: atomicrmw.end: 1712; COMMON-NEXT: ret void 1713; 1714 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 1715 ret void 1716} 1717 1718define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 1719; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1720; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1721; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1722; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1723; GFX803: atomicrmw.start: 1724; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1725; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1726; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1727; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1728; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1729; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1730; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1731; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1732; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1733; GFX803: atomicrmw.end: 1734; GFX803-NEXT: ret void 1735; 1736; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1737; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1738; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1739; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1740; GFX906: atomicrmw.start: 1741; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1742; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1743; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1744; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1745; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1746; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1747; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1748; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1749; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1750; GFX906: atomicrmw.end: 1751; GFX906-NEXT: ret void 1752; 1753; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1754; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1755; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1756; GFX908-NEXT: ret void 1757; 1758; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1759; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1760; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1761; GFX90A-NEXT: ret void 1762; 1763; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1764; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1765; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1766; GFX940-NEXT: ret void 1767; 1768; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1769; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1770; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1771; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1772; GFX10: atomicrmw.start: 1773; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1774; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1775; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1776; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1777; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1778; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1779; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1780; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1781; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1782; GFX10: atomicrmw.end: 1783; GFX10-NEXT: ret void 1784; 1785; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1786; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1787; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1788; GFX11-NEXT: ret void 1789; 1790; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 1791; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1792; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1793; GFX12-NEXT: ret void 1794; 1795 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 1796 ret void 1797} 1798 1799define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1800; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1801; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1802; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1803; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1804; GFX803: atomicrmw.start: 1805; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1806; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1807; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1808; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1809; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1810; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1811; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1812; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1813; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1814; GFX803: atomicrmw.end: 1815; GFX803-NEXT: ret void 1816; 1817; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1818; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1819; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1820; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1821; GFX906: atomicrmw.start: 1822; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1823; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1824; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1825; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1826; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1827; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1828; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1829; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1830; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1831; GFX906: atomicrmw.end: 1832; GFX906-NEXT: ret void 1833; 1834; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1835; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1836; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1837; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1838; GFX908: atomicrmw.start: 1839; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1840; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1841; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1842; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1843; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1844; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1845; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1846; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1847; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1848; GFX908: atomicrmw.end: 1849; GFX908-NEXT: ret void 1850; 1851; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1852; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1853; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1854; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1855; GFX90A: atomicrmw.start: 1856; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1857; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1858; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1859; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1860; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1861; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1862; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1863; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1864; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1865; GFX90A: atomicrmw.end: 1866; GFX90A-NEXT: ret void 1867; 1868; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1869; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1870; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1871; GFX940-NEXT: ret void 1872; 1873; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1874; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1875; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1876; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1877; GFX10: atomicrmw.start: 1878; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1879; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1880; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1881; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1882; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1883; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1884; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1885; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1886; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1887; GFX10: atomicrmw.end: 1888; GFX10-NEXT: ret void 1889; 1890; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1891; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1892; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1893; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 1894; GFX11: atomicrmw.start: 1895; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1896; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1897; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1898; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1899; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1900; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1901; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1902; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1903; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1904; GFX11: atomicrmw.end: 1905; GFX11-NEXT: ret void 1906; 1907; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1908; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1909; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1910; GFX12-NEXT: ret void 1911; 1912 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1913 ret void 1914} 1915 1916define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1917; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1918; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1919; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1920; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1921; GFX803: atomicrmw.start: 1922; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1923; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1924; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1925; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1926; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1927; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1928; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1929; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1930; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1931; GFX803: atomicrmw.end: 1932; GFX803-NEXT: ret void 1933; 1934; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1935; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1936; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1937; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1938; GFX906: atomicrmw.start: 1939; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1940; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1941; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1942; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1943; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1944; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1945; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1946; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1947; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1948; GFX906: atomicrmw.end: 1949; GFX906-NEXT: ret void 1950; 1951; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1952; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1953; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1954; GFX908-NEXT: ret void 1955; 1956; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1957; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1958; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1959; GFX90A-NEXT: ret void 1960; 1961; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1962; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1963; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1964; GFX940-NEXT: ret void 1965; 1966; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1967; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1968; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1969; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1970; GFX10: atomicrmw.start: 1971; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1972; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1973; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1974; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1975; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1976; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1977; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1978; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1979; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1980; GFX10: atomicrmw.end: 1981; GFX10-NEXT: ret void 1982; 1983; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1984; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1985; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1986; GFX11-NEXT: ret void 1987; 1988; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1989; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1990; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1991; GFX12-NEXT: ret void 1992; 1993 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1994 ret void 1995} 1996 1997define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 { 1998; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1999; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2000; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2001; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2002; GFX803: atomicrmw.start: 2003; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2004; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2005; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2006; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2007; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2008; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2009; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2010; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2011; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2012; GFX803: atomicrmw.end: 2013; GFX803-NEXT: ret void 2014; 2015; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2016; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2017; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2018; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2019; GFX906: atomicrmw.start: 2020; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2021; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2022; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2023; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2024; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2025; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2026; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2027; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2028; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2029; GFX906: atomicrmw.end: 2030; GFX906-NEXT: ret void 2031; 2032; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2033; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2034; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2035; GFX908-NEXT: ret void 2036; 2037; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2038; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2039; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2040; GFX90A-NEXT: ret void 2041; 2042; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2043; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2044; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2045; GFX940-NEXT: ret void 2046; 2047; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2048; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2049; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2050; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2051; GFX10: atomicrmw.start: 2052; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2053; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2054; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2055; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2056; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2057; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2058; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2059; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2060; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2061; GFX10: atomicrmw.end: 2062; GFX10-NEXT: ret void 2063; 2064; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2065; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2066; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2067; GFX11-NEXT: ret void 2068; 2069; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2070; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2071; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2072; GFX12-NEXT: ret void 2073; 2074 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2075 ret void 2076} 2077 2078define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 2079; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2080; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2081; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2082; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2083; GFX803: atomicrmw.start: 2084; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2085; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2086; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2087; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2088; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2089; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2090; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2091; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2092; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2093; GFX803: atomicrmw.end: 2094; GFX803-NEXT: ret void 2095; 2096; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2097; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2098; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2099; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2100; GFX906: atomicrmw.start: 2101; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2102; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2103; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2104; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2105; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2106; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2107; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2108; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2109; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2110; GFX906: atomicrmw.end: 2111; GFX906-NEXT: ret void 2112; 2113; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2114; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2115; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2116; GFX908-NEXT: ret void 2117; 2118; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2119; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2120; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2121; GFX90A-NEXT: ret void 2122; 2123; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2124; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2125; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2126; GFX940-NEXT: ret void 2127; 2128; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2129; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2130; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2131; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2132; GFX10: atomicrmw.start: 2133; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2134; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2135; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2136; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2137; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2138; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2139; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2140; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2141; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2142; GFX10: atomicrmw.end: 2143; GFX10-NEXT: ret void 2144; 2145; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2146; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2147; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2148; GFX11-NEXT: ret void 2149; 2150; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2151; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2152; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2153; GFX12-NEXT: ret void 2154; 2155 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2156 ret void 2157} 2158 2159;--------------------------------------------------------------------- 2160; atomicrmw fsub 2161;--------------------------------------------------------------------- 2162 2163define float @test_atomicrmw_fsub_f32_global_system(ptr addrspace(1) %ptr, float %value) { 2164; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system( 2165; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2166; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2167; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2168; COMMON: atomicrmw.start: 2169; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2170; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2171; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2172; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2173; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 2174; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2175; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2176; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2177; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2178; COMMON: atomicrmw.end: 2179; COMMON-NEXT: ret float [[RES]] 2180; 2181 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst 2182 ret float %res 2183} 2184 2185define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2186; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_fine_grained_memory( 2187; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2188; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2189; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2190; COMMON: atomicrmw.start: 2191; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2192; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2193; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2194; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2195; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2196; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2197; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2198; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2199; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2200; COMMON: atomicrmw.end: 2201; COMMON-NEXT: ret float [[RES]] 2202; 2203 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 2204 ret float %res 2205} 2206 2207define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2208; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_remote_memory( 2209; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2210; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2211; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2212; COMMON: atomicrmw.start: 2213; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2214; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2215; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2216; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2217; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2218; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2219; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2220; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2221; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2222; COMMON: atomicrmw.end: 2223; COMMON-NEXT: ret float [[RES]] 2224; 2225 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 2226 ret float %res 2227} 2228 2229define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2230; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2231; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2232; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2233; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2234; COMMON: atomicrmw.start: 2235; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2236; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2237; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2238; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2239; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2240; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2241; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2242; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2243; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2244; COMMON: atomicrmw.end: 2245; COMMON-NEXT: ret float [[RES]] 2246; 2247 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 2248 ret float %res 2249} 2250 2251define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 2252; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode( 2253; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2254; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2255; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2256; COMMON: atomicrmw.start: 2257; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2258; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2259; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2260; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2261; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 2262; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2263; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2264; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2265; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2266; COMMON: atomicrmw.end: 2267; COMMON-NEXT: ret float [[TMP5]] 2268; 2269 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 2270 ret float %res 2271} 2272 2273define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2274; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2275; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2276; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2277; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2278; COMMON: atomicrmw.start: 2279; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2280; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2281; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2282; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2283; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2284; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2285; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2286; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2287; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2288; COMMON: atomicrmw.end: 2289; COMMON-NEXT: ret float [[TMP5]] 2290; 2291 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 2292 ret float %res 2293} 2294 2295define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2296; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2297; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2298; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2299; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2300; COMMON: atomicrmw.start: 2301; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2302; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2303; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2304; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2305; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2306; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2307; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2308; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2309; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2310; COMMON: atomicrmw.end: 2311; COMMON-NEXT: ret float [[TMP5]] 2312; 2313 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2314 ret float %res 2315} 2316 2317define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2318; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2319; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2320; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2321; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2322; COMMON: atomicrmw.start: 2323; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2324; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2325; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2326; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2327; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2328; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2329; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2330; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2331; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2332; COMMON: atomicrmw.end: 2333; COMMON-NEXT: ret float [[TMP5]] 2334; 2335 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2336 ret float %res 2337} 2338 2339;--------------------------------------------------------------------- 2340; atomicrmw fmax 2341;--------------------------------------------------------------------- 2342 2343define float @test_atomicrmw_fmax_f32_global_system(ptr addrspace(1) %ptr, float %value) { 2344; COMMON-LABEL: define float @test_atomicrmw_fmax_f32_global_system( 2345; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2346; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2347; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2348; COMMON: atomicrmw.start: 2349; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2350; COMMON-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2351; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2352; COMMON-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2353; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 2354; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2355; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2356; COMMON-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2357; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2358; COMMON: atomicrmw.end: 2359; COMMON-NEXT: ret float [[TMP6]] 2360; 2361 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst 2362 ret float %res 2363} 2364 2365define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2366; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2367; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2368; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2369; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2370; GFX803: atomicrmw.start: 2371; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2372; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2373; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2374; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2375; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2376; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2377; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2378; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2379; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2380; GFX803: atomicrmw.end: 2381; GFX803-NEXT: ret float [[TMP6]] 2382; 2383; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2384; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2385; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2386; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2387; GFX906: atomicrmw.start: 2388; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2389; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2390; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2391; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2392; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2393; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2394; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2395; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2396; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2397; GFX906: atomicrmw.end: 2398; GFX906-NEXT: ret float [[TMP6]] 2399; 2400; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2401; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2402; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2403; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2404; GFX908: atomicrmw.start: 2405; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2406; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2407; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2408; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2409; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2410; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2411; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2412; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2413; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2414; GFX908: atomicrmw.end: 2415; GFX908-NEXT: ret float [[TMP6]] 2416; 2417; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2418; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2419; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2420; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2421; GFX90A: atomicrmw.start: 2422; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2423; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2424; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2425; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2426; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2427; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2428; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2429; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2430; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2431; GFX90A: atomicrmw.end: 2432; GFX90A-NEXT: ret float [[TMP6]] 2433; 2434; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2435; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2436; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2437; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2438; GFX940: atomicrmw.start: 2439; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2440; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2441; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2442; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2443; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2444; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2445; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2446; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2447; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2448; GFX940: atomicrmw.end: 2449; GFX940-NEXT: ret float [[TMP6]] 2450; 2451; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2452; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2453; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2454; GFX10-NEXT: ret float [[RES]] 2455; 2456; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2457; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2458; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2459; GFX11-NEXT: ret float [[RES]] 2460; 2461; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( 2462; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2463; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2464; GFX12-NEXT: ret float [[RES]] 2465; 2466 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 2467 ret float %res 2468} 2469 2470define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2471; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2472; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2473; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2474; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2475; GFX803: atomicrmw.start: 2476; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2477; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2478; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2479; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2480; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2481; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2482; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2483; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2484; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2485; GFX803: atomicrmw.end: 2486; GFX803-NEXT: ret float [[TMP6]] 2487; 2488; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2489; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2490; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2491; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2492; GFX906: atomicrmw.start: 2493; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2494; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2495; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2496; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2497; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2498; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2499; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2500; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2501; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2502; GFX906: atomicrmw.end: 2503; GFX906-NEXT: ret float [[TMP6]] 2504; 2505; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2506; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2507; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2508; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2509; GFX908: atomicrmw.start: 2510; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2511; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2512; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2513; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2514; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2515; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2516; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2517; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2518; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2519; GFX908: atomicrmw.end: 2520; GFX908-NEXT: ret float [[TMP6]] 2521; 2522; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2523; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2524; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2525; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2526; GFX90A: atomicrmw.start: 2527; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2528; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2529; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2530; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2531; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2532; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2533; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2534; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2535; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2536; GFX90A: atomicrmw.end: 2537; GFX90A-NEXT: ret float [[TMP6]] 2538; 2539; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2540; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2541; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2542; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2543; GFX940: atomicrmw.start: 2544; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2545; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2546; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2547; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2548; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2549; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2550; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2551; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2552; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2553; GFX940: atomicrmw.end: 2554; GFX940-NEXT: ret float [[TMP6]] 2555; 2556; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2557; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2558; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2559; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2560; GFX10: atomicrmw.start: 2561; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2562; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2563; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2564; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2565; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2566; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2567; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2568; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2569; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2570; GFX10: atomicrmw.end: 2571; GFX10-NEXT: ret float [[RES]] 2572; 2573; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2574; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2575; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2576; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 2577; GFX11: atomicrmw.start: 2578; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2579; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2580; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2581; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2582; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2583; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2584; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2585; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2586; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2587; GFX11: atomicrmw.end: 2588; GFX11-NEXT: ret float [[RES]] 2589; 2590; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( 2591; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2592; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2593; GFX12-NEXT: ret float [[RES]] 2594; 2595 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 2596 ret float %res 2597} 2598 2599define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2600; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2601; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2602; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2603; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2604; GFX803: atomicrmw.start: 2605; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2606; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2607; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2608; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2609; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2610; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2611; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2612; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2613; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2614; GFX803: atomicrmw.end: 2615; GFX803-NEXT: ret float [[TMP6]] 2616; 2617; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2618; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2619; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2620; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2621; GFX906: atomicrmw.start: 2622; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2623; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2624; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2625; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2626; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2627; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2628; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2629; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2630; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2631; GFX906: atomicrmw.end: 2632; GFX906-NEXT: ret float [[TMP6]] 2633; 2634; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2635; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2636; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2637; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2638; GFX908: atomicrmw.start: 2639; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2640; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2641; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2642; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2643; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2644; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2645; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2646; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2647; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2648; GFX908: atomicrmw.end: 2649; GFX908-NEXT: ret float [[TMP6]] 2650; 2651; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2652; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2653; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2654; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2655; GFX90A: atomicrmw.start: 2656; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2657; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2658; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2659; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2660; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2661; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2662; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2663; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2664; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2665; GFX90A: atomicrmw.end: 2666; GFX90A-NEXT: ret float [[TMP6]] 2667; 2668; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2669; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2670; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2671; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2672; GFX940: atomicrmw.start: 2673; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2674; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2675; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2676; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2677; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2678; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2679; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2680; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2681; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2682; GFX940: atomicrmw.end: 2683; GFX940-NEXT: ret float [[TMP6]] 2684; 2685; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2686; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2687; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2688; GFX10-NEXT: ret float [[RES]] 2689; 2690; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2691; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2692; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2693; GFX11-NEXT: ret float [[RES]] 2694; 2695; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2696; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2697; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2698; GFX12-NEXT: ret float [[RES]] 2699; 2700 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 2701 ret float %res 2702} 2703 2704define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 2705; COMMON-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode( 2706; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2707; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2708; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2709; COMMON: atomicrmw.start: 2710; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2711; COMMON-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2712; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2713; COMMON-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2714; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 2715; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2716; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2717; COMMON-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2718; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2719; COMMON: atomicrmw.end: 2720; COMMON-NEXT: ret float [[TMP6]] 2721; 2722 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 2723 ret float %res 2724} 2725 2726define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2727; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2728; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2729; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2730; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2731; GFX803: atomicrmw.start: 2732; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2733; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2734; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2735; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2736; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2737; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2738; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2739; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2740; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2741; GFX803: atomicrmw.end: 2742; GFX803-NEXT: ret float [[TMP6]] 2743; 2744; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2745; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2746; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2747; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2748; GFX906: atomicrmw.start: 2749; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2750; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2751; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2752; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2753; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2754; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2755; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2756; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2757; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2758; GFX906: atomicrmw.end: 2759; GFX906-NEXT: ret float [[TMP6]] 2760; 2761; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2762; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2763; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2764; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2765; GFX908: atomicrmw.start: 2766; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2767; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2768; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2769; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2770; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2771; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2772; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2773; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2774; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2775; GFX908: atomicrmw.end: 2776; GFX908-NEXT: ret float [[TMP6]] 2777; 2778; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2779; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2780; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2781; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2782; GFX90A: atomicrmw.start: 2783; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2784; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2785; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2786; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2787; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2788; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2789; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2790; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2791; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2792; GFX90A: atomicrmw.end: 2793; GFX90A-NEXT: ret float [[TMP6]] 2794; 2795; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2796; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2797; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2798; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2799; GFX940: atomicrmw.start: 2800; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2801; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2802; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2803; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2804; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2805; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2806; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2807; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2808; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2809; GFX940: atomicrmw.end: 2810; GFX940-NEXT: ret float [[TMP6]] 2811; 2812; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2813; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2814; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2815; GFX10-NEXT: ret float [[RES]] 2816; 2817; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2818; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2819; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2820; GFX11-NEXT: ret float [[RES]] 2821; 2822; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2823; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2824; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2825; GFX12-NEXT: ret float [[RES]] 2826; 2827 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 2828 ret float %res 2829} 2830 2831define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2832; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2833; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2834; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2835; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2836; GFX803: atomicrmw.start: 2837; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2838; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2839; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2840; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2841; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2842; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2843; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2844; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2845; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2846; GFX803: atomicrmw.end: 2847; GFX803-NEXT: ret float [[TMP6]] 2848; 2849; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2850; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2851; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2852; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2853; GFX906: atomicrmw.start: 2854; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2855; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2856; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2857; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2858; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2859; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2860; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2861; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2862; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2863; GFX906: atomicrmw.end: 2864; GFX906-NEXT: ret float [[TMP6]] 2865; 2866; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2867; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2868; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2869; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2870; GFX908: atomicrmw.start: 2871; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2872; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2873; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2874; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2875; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2876; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2877; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2878; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2879; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2880; GFX908: atomicrmw.end: 2881; GFX908-NEXT: ret float [[TMP6]] 2882; 2883; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2884; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2885; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2886; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2887; GFX90A: atomicrmw.start: 2888; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2889; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2890; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2891; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2892; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2893; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2894; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2895; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2896; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2897; GFX90A: atomicrmw.end: 2898; GFX90A-NEXT: ret float [[TMP6]] 2899; 2900; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2901; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2902; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2903; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2904; GFX940: atomicrmw.start: 2905; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2906; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2907; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2908; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2909; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2910; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2911; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2912; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2913; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2914; GFX940: atomicrmw.end: 2915; GFX940-NEXT: ret float [[TMP6]] 2916; 2917; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2918; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2919; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2920; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2921; GFX10: atomicrmw.start: 2922; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2923; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2924; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2925; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2926; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2927; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2928; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2929; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2930; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2931; GFX10: atomicrmw.end: 2932; GFX10-NEXT: ret float [[RES]] 2933; 2934; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2935; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2936; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2937; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 2938; GFX11: atomicrmw.start: 2939; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2940; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2941; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2942; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2943; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2944; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2945; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2946; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2947; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2948; GFX11: atomicrmw.end: 2949; GFX11-NEXT: ret float [[RES]] 2950; 2951; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2952; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2953; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2954; GFX12-NEXT: ret float [[RES]] 2955; 2956 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2957 ret float %res 2958} 2959 2960define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2961; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2962; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2963; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2964; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2965; GFX803: atomicrmw.start: 2966; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2967; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2968; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2969; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2970; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2971; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2972; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2973; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2974; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2975; GFX803: atomicrmw.end: 2976; GFX803-NEXT: ret float [[TMP6]] 2977; 2978; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2979; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2980; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2981; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2982; GFX906: atomicrmw.start: 2983; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2984; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2985; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2986; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2987; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2988; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2989; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2990; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2991; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2992; GFX906: atomicrmw.end: 2993; GFX906-NEXT: ret float [[TMP6]] 2994; 2995; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2996; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2997; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2998; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2999; GFX908: atomicrmw.start: 3000; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3001; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3002; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3003; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3004; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3005; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3006; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3007; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3008; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3009; GFX908: atomicrmw.end: 3010; GFX908-NEXT: ret float [[TMP6]] 3011; 3012; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3013; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3014; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3015; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3016; GFX90A: atomicrmw.start: 3017; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3018; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3019; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3020; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3021; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3022; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3023; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3024; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3025; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3026; GFX90A: atomicrmw.end: 3027; GFX90A-NEXT: ret float [[TMP6]] 3028; 3029; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3030; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3031; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3032; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3033; GFX940: atomicrmw.start: 3034; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3035; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3036; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3037; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3038; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3039; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3040; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3041; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3042; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3043; GFX940: atomicrmw.end: 3044; GFX940-NEXT: ret float [[TMP6]] 3045; 3046; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3047; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3048; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3049; GFX10-NEXT: ret float [[RES]] 3050; 3051; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3052; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3053; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3054; GFX11-NEXT: ret float [[RES]] 3055; 3056; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3057; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3058; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3059; GFX12-NEXT: ret float [[RES]] 3060; 3061 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 3062 ret float %res 3063} 3064 3065;--------------------------------------------------------------------- 3066; atomicrmw fmin 3067;--------------------------------------------------------------------- 3068 3069define float @test_atomicrmw_fmin_f32_global_system(ptr addrspace(1) %ptr, float %value) { 3070; COMMON-LABEL: define float @test_atomicrmw_fmin_f32_global_system( 3071; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3072; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3073; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 3074; COMMON: atomicrmw.start: 3075; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3076; COMMON-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3077; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3078; COMMON-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3079; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 3080; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3081; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3082; COMMON-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3083; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3084; COMMON: atomicrmw.end: 3085; COMMON-NEXT: ret float [[TMP6]] 3086; 3087 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst 3088 ret float %res 3089} 3090 3091define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 3092; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3093; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3094; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3095; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3096; GFX803: atomicrmw.start: 3097; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3098; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3099; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3100; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3101; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3102; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3103; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3104; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3105; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3106; GFX803: atomicrmw.end: 3107; GFX803-NEXT: ret float [[TMP6]] 3108; 3109; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3110; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3111; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3112; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3113; GFX906: atomicrmw.start: 3114; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3115; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3116; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3117; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3118; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3119; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3120; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3121; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3122; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3123; GFX906: atomicrmw.end: 3124; GFX906-NEXT: ret float [[TMP6]] 3125; 3126; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3127; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3128; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3129; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3130; GFX908: atomicrmw.start: 3131; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3132; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3133; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3134; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3135; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3136; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3137; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3138; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3139; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3140; GFX908: atomicrmw.end: 3141; GFX908-NEXT: ret float [[TMP6]] 3142; 3143; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3144; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3145; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3146; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3147; GFX90A: atomicrmw.start: 3148; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3149; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3150; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3151; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3152; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3153; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3154; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3155; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3156; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3157; GFX90A: atomicrmw.end: 3158; GFX90A-NEXT: ret float [[TMP6]] 3159; 3160; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3161; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3162; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3163; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3164; GFX940: atomicrmw.start: 3165; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3166; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3167; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3168; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3169; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3170; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3171; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3172; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3173; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3174; GFX940: atomicrmw.end: 3175; GFX940-NEXT: ret float [[TMP6]] 3176; 3177; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3178; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3179; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3180; GFX10-NEXT: ret float [[RES]] 3181; 3182; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3183; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3184; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3185; GFX11-NEXT: ret float [[RES]] 3186; 3187; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( 3188; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3189; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3190; GFX12-NEXT: ret float [[RES]] 3191; 3192 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0 3193 ret float %res 3194} 3195 3196define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3197; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3198; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3199; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3200; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3201; GFX803: atomicrmw.start: 3202; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3203; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3204; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3205; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3206; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3207; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3208; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3209; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3210; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3211; GFX803: atomicrmw.end: 3212; GFX803-NEXT: ret float [[TMP6]] 3213; 3214; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3215; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3216; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3217; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3218; GFX906: atomicrmw.start: 3219; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3220; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3221; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3222; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3223; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3224; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3225; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3226; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3227; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3228; GFX906: atomicrmw.end: 3229; GFX906-NEXT: ret float [[TMP6]] 3230; 3231; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3232; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3233; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3234; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3235; GFX908: atomicrmw.start: 3236; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3237; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3238; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3239; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3240; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3241; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3242; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3243; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3244; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3245; GFX908: atomicrmw.end: 3246; GFX908-NEXT: ret float [[TMP6]] 3247; 3248; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3249; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3250; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3251; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3252; GFX90A: atomicrmw.start: 3253; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3254; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3255; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3256; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3257; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3258; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3259; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3260; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3261; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3262; GFX90A: atomicrmw.end: 3263; GFX90A-NEXT: ret float [[TMP6]] 3264; 3265; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3266; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3267; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3268; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3269; GFX940: atomicrmw.start: 3270; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3271; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3272; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3273; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3274; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3275; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3276; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3277; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3278; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3279; GFX940: atomicrmw.end: 3280; GFX940-NEXT: ret float [[TMP6]] 3281; 3282; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3283; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3284; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3285; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3286; GFX10: atomicrmw.start: 3287; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3288; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3289; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3290; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3291; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3292; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3293; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3294; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3295; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3296; GFX10: atomicrmw.end: 3297; GFX10-NEXT: ret float [[RES]] 3298; 3299; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3300; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3301; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3302; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3303; GFX11: atomicrmw.start: 3304; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3305; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3306; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3307; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3308; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3309; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3310; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3311; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3312; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3313; GFX11: atomicrmw.end: 3314; GFX11-NEXT: ret float [[RES]] 3315; 3316; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( 3317; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3318; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3319; GFX12-NEXT: ret float [[RES]] 3320; 3321 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0 3322 ret float %res 3323} 3324 3325define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3326; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3327; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3328; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3329; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3330; GFX803: atomicrmw.start: 3331; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3332; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3333; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3334; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3335; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3336; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3337; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3338; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3339; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3340; GFX803: atomicrmw.end: 3341; GFX803-NEXT: ret float [[TMP6]] 3342; 3343; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3344; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3345; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3346; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3347; GFX906: atomicrmw.start: 3348; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3349; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3350; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3351; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3352; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3353; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3354; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3355; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3356; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3357; GFX906: atomicrmw.end: 3358; GFX906-NEXT: ret float [[TMP6]] 3359; 3360; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3361; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3362; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3363; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3364; GFX908: atomicrmw.start: 3365; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3366; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3367; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3368; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3369; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3370; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3371; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3372; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3373; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3374; GFX908: atomicrmw.end: 3375; GFX908-NEXT: ret float [[TMP6]] 3376; 3377; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3378; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3379; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3380; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3381; GFX90A: atomicrmw.start: 3382; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3383; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3384; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3385; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3386; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3387; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3388; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3389; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3390; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3391; GFX90A: atomicrmw.end: 3392; GFX90A-NEXT: ret float [[TMP6]] 3393; 3394; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3395; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3396; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3397; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3398; GFX940: atomicrmw.start: 3399; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3400; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3401; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3402; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3403; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3404; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3405; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3406; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3407; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3408; GFX940: atomicrmw.end: 3409; GFX940-NEXT: ret float [[TMP6]] 3410; 3411; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3412; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3413; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3414; GFX10-NEXT: ret float [[RES]] 3415; 3416; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3417; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3418; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3419; GFX11-NEXT: ret float [[RES]] 3420; 3421; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3422; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3423; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3424; GFX12-NEXT: ret float [[RES]] 3425; 3426 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 3427 ret float %res 3428} 3429 3430define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 3431; COMMON-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode( 3432; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3433; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3434; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 3435; COMMON: atomicrmw.start: 3436; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3437; COMMON-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3438; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3439; COMMON-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3440; COMMON-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 3441; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3442; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3443; COMMON-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3444; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3445; COMMON: atomicrmw.end: 3446; COMMON-NEXT: ret float [[TMP6]] 3447; 3448 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 3449 ret float %res 3450} 3451 3452define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 3453; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3454; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3455; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3456; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3457; GFX803: atomicrmw.start: 3458; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3459; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3460; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3461; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3462; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3463; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3464; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3465; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3466; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3467; GFX803: atomicrmw.end: 3468; GFX803-NEXT: ret float [[TMP6]] 3469; 3470; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3471; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3472; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3473; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3474; GFX906: atomicrmw.start: 3475; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3476; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3477; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3478; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3479; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3480; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3481; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3482; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3483; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3484; GFX906: atomicrmw.end: 3485; GFX906-NEXT: ret float [[TMP6]] 3486; 3487; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3488; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3489; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3490; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3491; GFX908: atomicrmw.start: 3492; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3493; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3494; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3495; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3496; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3497; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3498; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3499; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3500; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3501; GFX908: atomicrmw.end: 3502; GFX908-NEXT: ret float [[TMP6]] 3503; 3504; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3505; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3506; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3507; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3508; GFX90A: atomicrmw.start: 3509; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3510; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3511; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3512; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3513; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3514; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3515; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3516; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3517; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3518; GFX90A: atomicrmw.end: 3519; GFX90A-NEXT: ret float [[TMP6]] 3520; 3521; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3522; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3523; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3524; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3525; GFX940: atomicrmw.start: 3526; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3527; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3528; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3529; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3530; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3531; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3532; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3533; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3534; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3535; GFX940: atomicrmw.end: 3536; GFX940-NEXT: ret float [[TMP6]] 3537; 3538; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3539; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3540; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3541; GFX10-NEXT: ret float [[RES]] 3542; 3543; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3544; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3545; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3546; GFX11-NEXT: ret float [[RES]] 3547; 3548; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3549; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3550; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3551; GFX12-NEXT: ret float [[RES]] 3552; 3553 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 3554 ret float %res 3555} 3556 3557define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3558; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3559; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3560; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3561; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3562; GFX803: atomicrmw.start: 3563; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3564; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3565; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3566; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3567; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3568; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3569; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3570; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3571; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3572; GFX803: atomicrmw.end: 3573; GFX803-NEXT: ret float [[TMP6]] 3574; 3575; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3576; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3577; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3578; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3579; GFX906: atomicrmw.start: 3580; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3581; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3582; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3583; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3584; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3585; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3586; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3587; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3588; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3589; GFX906: atomicrmw.end: 3590; GFX906-NEXT: ret float [[TMP6]] 3591; 3592; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3593; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3594; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3595; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3596; GFX908: atomicrmw.start: 3597; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3598; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3599; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3600; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3601; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3602; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3603; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3604; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3605; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3606; GFX908: atomicrmw.end: 3607; GFX908-NEXT: ret float [[TMP6]] 3608; 3609; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3610; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3611; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3612; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3613; GFX90A: atomicrmw.start: 3614; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3615; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3616; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3617; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3618; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3619; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3620; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3621; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3622; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3623; GFX90A: atomicrmw.end: 3624; GFX90A-NEXT: ret float [[TMP6]] 3625; 3626; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3627; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3628; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3629; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3630; GFX940: atomicrmw.start: 3631; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3632; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3633; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3634; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3635; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3636; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3637; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3638; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3639; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3640; GFX940: atomicrmw.end: 3641; GFX940-NEXT: ret float [[TMP6]] 3642; 3643; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3644; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3645; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3646; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3647; GFX10: atomicrmw.start: 3648; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3649; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3650; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3651; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3652; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3653; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3654; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3655; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3656; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3657; GFX10: atomicrmw.end: 3658; GFX10-NEXT: ret float [[RES]] 3659; 3660; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3661; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3662; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3663; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3664; GFX11: atomicrmw.start: 3665; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3666; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3667; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3668; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3669; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3670; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3671; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3672; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3673; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3674; GFX11: atomicrmw.end: 3675; GFX11-NEXT: ret float [[RES]] 3676; 3677; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3678; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3679; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3680; GFX12-NEXT: ret float [[RES]] 3681; 3682 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 3683 ret float %res 3684} 3685 3686define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3687; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3688; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3689; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3690; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3691; GFX803: atomicrmw.start: 3692; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3693; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3694; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3695; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3696; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3697; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3698; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3699; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3700; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3701; GFX803: atomicrmw.end: 3702; GFX803-NEXT: ret float [[TMP6]] 3703; 3704; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3705; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3706; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3707; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3708; GFX906: atomicrmw.start: 3709; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3710; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3711; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3712; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3713; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3714; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3715; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3716; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3717; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3718; GFX906: atomicrmw.end: 3719; GFX906-NEXT: ret float [[TMP6]] 3720; 3721; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3722; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3723; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3724; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3725; GFX908: atomicrmw.start: 3726; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3727; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3728; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3729; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3730; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3731; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3732; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3733; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3734; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3735; GFX908: atomicrmw.end: 3736; GFX908-NEXT: ret float [[TMP6]] 3737; 3738; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3739; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3740; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3741; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3742; GFX90A: atomicrmw.start: 3743; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3744; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3745; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3746; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3747; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3748; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3749; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3750; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3751; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3752; GFX90A: atomicrmw.end: 3753; GFX90A-NEXT: ret float [[TMP6]] 3754; 3755; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3756; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3757; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3758; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3759; GFX940: atomicrmw.start: 3760; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3761; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3762; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3763; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3764; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3765; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3766; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3767; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3768; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3769; GFX940: atomicrmw.end: 3770; GFX940-NEXT: ret float [[TMP6]] 3771; 3772; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3773; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3774; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3775; GFX10-NEXT: ret float [[RES]] 3776; 3777; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3778; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3779; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3780; GFX11-NEXT: ret float [[RES]] 3781; 3782; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3783; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3784; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3785; GFX12-NEXT: ret float [[RES]] 3786; 3787 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 3788 ret float %res 3789} 3790 3791attributes #0 = { "denormal-fp-mode-f32"="preserve-sign,preserve-sign" } 3792attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" } 3793 3794!0 = !{} 3795;. 3796; GFX803: [[META0]] = !{} 3797;. 3798; GFX906: [[META0]] = !{} 3799;. 3800; GFX908: [[META0]] = !{} 3801;. 3802; GFX90A: [[META0]] = !{} 3803;. 3804; GFX940: [[META0]] = !{} 3805;. 3806; GFX10: [[META0]] = !{} 3807;. 3808; GFX11: [[META0]] = !{} 3809;. 3810; GFX12: [[META0]] = !{} 3811;. 3812