1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX803 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s 5; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s 6; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s 7; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s 8; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s 9; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s 10 11;--------------------------------------------------------------------- 12; atomicrmw xchg 13;--------------------------------------------------------------------- 14 15; xchg is supported over PCIe, so no expansion is necessary 16define float @test_atomicrmw_xchg_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 17; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent( 18; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { 19; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 20; COMMON-NEXT: ret float [[RES]] 21; 22 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 23 ret float %res 24} 25 26; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 27define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 28; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory( 29; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 30; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] 31; COMMON-NEXT: ret float [[RES]] 32; 33 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 34 ret float %res 35} 36 37; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 38define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 39; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory( 40; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 41; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 42; COMMON-NEXT: ret float [[RES]] 43; 44 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 45 ret float %res 46} 47 48; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. 49define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 50; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 51; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 52; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 53; COMMON-NEXT: ret float [[RES]] 54; 55 %res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 56 ret float %res 57} 58 59;--------------------------------------------------------------------- 60; atomicrmw fadd 61;--------------------------------------------------------------------- 62 63define float @test_atomicrmw_fadd_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 64; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 65; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 66; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 67; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 68; GFX803: atomicrmw.start: 69; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 70; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 71; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 72; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 73; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 74; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 75; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 76; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 77; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 78; GFX803: atomicrmw.end: 79; GFX803-NEXT: ret float [[TMP5]] 80; 81; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 82; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 83; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 84; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 85; GFX906: atomicrmw.start: 86; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 87; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 88; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 89; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 90; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 91; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 92; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 93; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 94; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 95; GFX906: atomicrmw.end: 96; GFX906-NEXT: ret float [[TMP5]] 97; 98; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 99; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 100; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 101; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 102; GFX908: atomicrmw.start: 103; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 104; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 105; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 106; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 107; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 108; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 109; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 110; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 111; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 112; GFX908: atomicrmw.end: 113; GFX908-NEXT: ret float [[TMP5]] 114; 115; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 116; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 117; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 118; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 119; GFX90A: atomicrmw.start: 120; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 121; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 122; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 123; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 124; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 125; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 126; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 127; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 128; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 129; GFX90A: atomicrmw.end: 130; GFX90A-NEXT: ret float [[TMP5]] 131; 132; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 133; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 134; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 135; GFX940-NEXT: ret float [[RES]] 136; 137; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 138; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 139; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 140; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 141; GFX10: atomicrmw.start: 142; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 143; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 144; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 145; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 146; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 147; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 148; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 149; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 150; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 151; GFX10: atomicrmw.end: 152; GFX10-NEXT: ret float [[TMP5]] 153; 154; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 155; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 156; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 157; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 158; GFX11: atomicrmw.start: 159; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 160; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 161; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 162; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 163; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 164; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 165; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 166; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 167; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 168; GFX11: atomicrmw.end: 169; GFX11-NEXT: ret float [[TMP5]] 170; 171; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( 172; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 173; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 174; GFX12-NEXT: ret float [[TMP5]] 175; 176 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 177 ret float %res 178} 179 180define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 181; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 182; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 183; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 184; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 185; GFX803: atomicrmw.start: 186; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 187; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 188; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 189; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 190; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 191; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 192; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 193; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 194; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 195; GFX803: atomicrmw.end: 196; GFX803-NEXT: ret float [[TMP5]] 197; 198; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 199; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 200; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 201; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 202; GFX906: atomicrmw.start: 203; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 204; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 205; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 206; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 207; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 208; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 209; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 210; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 211; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 212; GFX906: atomicrmw.end: 213; GFX906-NEXT: ret float [[TMP5]] 214; 215; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 216; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 217; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 218; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 219; GFX908: atomicrmw.start: 220; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 221; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 222; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 223; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 224; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 225; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 226; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 227; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 228; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 229; GFX908: atomicrmw.end: 230; GFX908-NEXT: ret float [[TMP5]] 231; 232; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 233; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 234; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 235; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 236; GFX90A: atomicrmw.start: 237; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 238; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 239; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 240; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 241; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 242; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 243; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 244; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 245; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 246; GFX90A: atomicrmw.end: 247; GFX90A-NEXT: ret float [[TMP5]] 248; 249; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 250; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 251; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 252; GFX940-NEXT: ret float [[RES]] 253; 254; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 255; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 256; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 257; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 258; GFX10: atomicrmw.start: 259; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 260; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 261; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 262; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 263; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 264; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 265; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 266; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 267; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 268; GFX10: atomicrmw.end: 269; GFX10-NEXT: ret float [[TMP5]] 270; 271; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 272; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 273; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 274; GFX11-NEXT: ret float [[TMP5]] 275; 276; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( 277; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 278; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 279; GFX12-NEXT: ret float [[TMP5]] 280; 281 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 282 ret float %res 283} 284 285define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 286; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 287; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 288; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 289; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 290; GFX803: atomicrmw.start: 291; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 292; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 293; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 294; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 295; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 296; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 297; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 298; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 299; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 300; GFX803: atomicrmw.end: 301; GFX803-NEXT: ret float [[TMP5]] 302; 303; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 304; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 305; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 306; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 307; GFX906: atomicrmw.start: 308; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 309; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 310; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 311; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 312; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 313; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 314; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 315; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 316; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 317; GFX906: atomicrmw.end: 318; GFX906-NEXT: ret float [[TMP5]] 319; 320; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 321; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 322; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 323; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 324; GFX908: atomicrmw.start: 325; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 326; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 327; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 328; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 329; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 330; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 331; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 332; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 333; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 334; GFX908: atomicrmw.end: 335; GFX908-NEXT: ret float [[TMP5]] 336; 337; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 338; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 339; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 340; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 341; GFX90A: atomicrmw.start: 342; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 343; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 344; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 345; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 346; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 347; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 348; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 349; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 350; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 351; GFX90A: atomicrmw.end: 352; GFX90A-NEXT: ret float [[TMP5]] 353; 354; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 355; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 356; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 357; GFX940-NEXT: ret float [[RES]] 358; 359; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 360; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 361; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 362; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 363; GFX10: atomicrmw.start: 364; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 365; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 366; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 367; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 368; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 369; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 370; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 371; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 372; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 373; GFX10: atomicrmw.end: 374; GFX10-NEXT: ret float [[TMP5]] 375; 376; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 377; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 378; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 379; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 380; GFX11: atomicrmw.start: 381; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 382; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 383; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 384; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 385; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 386; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 387; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 388; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 389; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 390; GFX11: atomicrmw.end: 391; GFX11-NEXT: ret float [[TMP5]] 392; 393; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( 394; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 395; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 396; GFX12-NEXT: ret float [[TMP5]] 397; 398 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 399 ret float %res 400} 401 402define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 403; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 404; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 405; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 406; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 407; GFX803: atomicrmw.start: 408; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 409; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 410; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 411; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 412; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 413; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 414; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 415; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 416; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 417; GFX803: atomicrmw.end: 418; GFX803-NEXT: ret float [[TMP5]] 419; 420; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 421; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 422; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 423; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 424; GFX906: atomicrmw.start: 425; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 426; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 427; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 428; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 429; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 430; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 431; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 432; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 433; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 434; GFX906: atomicrmw.end: 435; GFX906-NEXT: ret float [[TMP5]] 436; 437; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 438; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 439; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 440; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 441; GFX908: atomicrmw.start: 442; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 443; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 444; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 445; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 446; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 447; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 448; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 449; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 450; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 451; GFX908: atomicrmw.end: 452; GFX908-NEXT: ret float [[TMP5]] 453; 454; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 455; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 456; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 457; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 458; GFX90A: atomicrmw.start: 459; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 460; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 461; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 462; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 463; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 464; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 465; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 466; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 467; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 468; GFX90A: atomicrmw.end: 469; GFX90A-NEXT: ret float [[TMP5]] 470; 471; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 472; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 473; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 474; GFX940-NEXT: ret float [[RES]] 475; 476; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 477; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 478; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 479; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 480; GFX10: atomicrmw.start: 481; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 482; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 483; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 484; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 485; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 486; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 487; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 488; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 489; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 490; GFX10: atomicrmw.end: 491; GFX10-NEXT: ret float [[TMP5]] 492; 493; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 494; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 495; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 496; GFX11-NEXT: ret float [[TMP5]] 497; 498; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 499; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 500; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 501; GFX12-NEXT: ret float [[TMP5]] 502; 503 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 504 ret float %res 505} 506 507define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 { 508; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 509; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 510; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 511; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 512; GFX803: atomicrmw.start: 513; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 514; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 515; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 516; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 517; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 518; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 519; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 520; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 521; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 522; GFX803: atomicrmw.end: 523; GFX803-NEXT: ret float [[TMP5]] 524; 525; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 526; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 527; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 528; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 529; GFX906: atomicrmw.start: 530; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 531; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 532; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 533; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 534; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 535; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 536; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 537; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 538; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 539; GFX906: atomicrmw.end: 540; GFX906-NEXT: ret float [[TMP5]] 541; 542; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 543; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 544; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 545; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 546; GFX908: atomicrmw.start: 547; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 548; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 549; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 550; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 551; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 552; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 553; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 554; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 555; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 556; GFX908: atomicrmw.end: 557; GFX908-NEXT: ret float [[TMP5]] 558; 559; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 560; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 561; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 562; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 563; GFX90A: atomicrmw.start: 564; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 565; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 566; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 567; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 568; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 569; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 570; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 571; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 572; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 573; GFX90A: atomicrmw.end: 574; GFX90A-NEXT: ret float [[TMP5]] 575; 576; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 577; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 578; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 579; GFX940-NEXT: ret float [[RES]] 580; 581; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 582; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 583; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 584; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 585; GFX10: atomicrmw.start: 586; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 587; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 588; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 589; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 590; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 591; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 592; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 593; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 594; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 595; GFX10: atomicrmw.end: 596; GFX10-NEXT: ret float [[TMP5]] 597; 598; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 599; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 600; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 601; GFX11-NEXT: ret float [[TMP5]] 602; 603; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 604; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { 605; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 606; GFX12-NEXT: ret float [[TMP5]] 607; 608 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 609 ret float %res 610} 611 612define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 613; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 614; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 615; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 616; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 617; GFX803: atomicrmw.start: 618; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 619; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 620; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 621; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 622; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 623; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 624; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 625; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 626; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 627; GFX803: atomicrmw.end: 628; GFX803-NEXT: ret float [[TMP5]] 629; 630; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 631; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 632; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 633; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 634; GFX906: atomicrmw.start: 635; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 636; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 637; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 638; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 639; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 640; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 641; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 642; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 643; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 644; GFX906: atomicrmw.end: 645; GFX906-NEXT: ret float [[TMP5]] 646; 647; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 648; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 649; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 650; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 651; GFX908: atomicrmw.start: 652; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 653; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 654; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 655; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 656; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 657; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 658; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 659; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 660; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 661; GFX908: atomicrmw.end: 662; GFX908-NEXT: ret float [[TMP5]] 663; 664; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 665; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 666; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 667; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 668; GFX90A: atomicrmw.start: 669; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 670; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 671; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 672; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 673; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 674; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 675; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 676; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 677; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 678; GFX90A: atomicrmw.end: 679; GFX90A-NEXT: ret float [[TMP5]] 680; 681; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 682; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 683; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 684; GFX940-NEXT: ret float [[RES]] 685; 686; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 687; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 688; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 689; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 690; GFX10: atomicrmw.start: 691; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 692; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 693; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 694; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 695; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 696; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 697; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 698; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 699; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 700; GFX10: atomicrmw.end: 701; GFX10-NEXT: ret float [[TMP5]] 702; 703; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 704; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 705; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 706; GFX11-NEXT: ret float [[TMP5]] 707; 708; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 709; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { 710; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 711; GFX12-NEXT: ret float [[TMP5]] 712; 713 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 714 ret float %res 715} 716 717define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 718; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 719; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 720; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 721; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 722; GFX803: atomicrmw.start: 723; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 724; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 725; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 726; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 727; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 728; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 729; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 730; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 731; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 732; GFX803: atomicrmw.end: 733; GFX803-NEXT: ret float [[TMP5]] 734; 735; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 736; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 737; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 738; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 739; GFX906: atomicrmw.start: 740; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 741; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 742; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 743; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 744; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 745; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 746; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 747; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 748; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 749; GFX906: atomicrmw.end: 750; GFX906-NEXT: ret float [[TMP5]] 751; 752; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 753; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 754; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 755; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 756; GFX908: atomicrmw.start: 757; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 758; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 759; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 760; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 761; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 762; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 763; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 764; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 765; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 766; GFX908: atomicrmw.end: 767; GFX908-NEXT: ret float [[TMP5]] 768; 769; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 770; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 771; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 772; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 773; GFX90A: atomicrmw.start: 774; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 775; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 776; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 777; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 778; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 779; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 780; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 781; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 782; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 783; GFX90A: atomicrmw.end: 784; GFX90A-NEXT: ret float [[TMP5]] 785; 786; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 787; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 788; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 789; GFX940-NEXT: ret float [[RES]] 790; 791; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 792; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 793; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 794; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 795; GFX10: atomicrmw.start: 796; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 797; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 798; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 799; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 800; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 801; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 802; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 803; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 804; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 805; GFX10: atomicrmw.end: 806; GFX10-NEXT: ret float [[TMP5]] 807; 808; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 809; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 810; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 811; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 812; GFX11: atomicrmw.start: 813; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 814; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 815; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 816; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 817; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 818; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 819; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 820; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 821; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 822; GFX11: atomicrmw.end: 823; GFX11-NEXT: ret float [[TMP5]] 824; 825; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( 826; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 827; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 828; GFX12-NEXT: ret float [[TMP5]] 829; 830 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 831 ret float %res 832} 833 834define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 835; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 836; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 837; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 838; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 839; GFX803: atomicrmw.start: 840; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 841; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 842; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 843; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 844; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 845; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 846; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 847; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 848; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 849; GFX803: atomicrmw.end: 850; GFX803-NEXT: ret float [[TMP5]] 851; 852; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 853; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 854; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 855; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 856; GFX906: atomicrmw.start: 857; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 858; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 859; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 860; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 861; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 862; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 863; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 864; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 865; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 866; GFX906: atomicrmw.end: 867; GFX906-NEXT: ret float [[TMP5]] 868; 869; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 870; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 871; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 872; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 873; GFX908: atomicrmw.start: 874; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 875; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 876; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 877; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 878; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 879; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 880; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 881; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 882; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 883; GFX908: atomicrmw.end: 884; GFX908-NEXT: ret float [[TMP5]] 885; 886; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 887; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 888; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 889; GFX90A-NEXT: ret float [[TMP5]] 890; 891; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 892; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 893; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 894; GFX940-NEXT: ret float [[RES]] 895; 896; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 897; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 898; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 899; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 900; GFX10: atomicrmw.start: 901; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 902; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 903; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 904; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 905; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 906; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 907; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 908; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 909; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 910; GFX10: atomicrmw.end: 911; GFX10-NEXT: ret float [[TMP5]] 912; 913; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 914; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 915; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 916; GFX11-NEXT: ret float [[TMP5]] 917; 918; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 919; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 920; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 921; GFX12-NEXT: ret float [[TMP5]] 922; 923 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 924 ret float %res 925} 926 927define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 928; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 929; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 930; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 931; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 932; GFX803: atomicrmw.start: 933; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 934; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 935; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 936; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 937; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 938; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 939; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 940; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 941; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 942; GFX803: atomicrmw.end: 943; GFX803-NEXT: ret float [[TMP5]] 944; 945; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 946; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 947; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 948; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 949; GFX906: atomicrmw.start: 950; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 951; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 952; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 953; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 954; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 955; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 956; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 957; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 958; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 959; GFX906: atomicrmw.end: 960; GFX906-NEXT: ret float [[TMP5]] 961; 962; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 963; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 964; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 965; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 966; GFX908: atomicrmw.start: 967; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 968; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 969; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 970; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 971; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 972; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 973; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 974; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 975; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 976; GFX908: atomicrmw.end: 977; GFX908-NEXT: ret float [[TMP5]] 978; 979; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 980; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 981; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 982; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 983; GFX90A: atomicrmw.start: 984; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 985; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 986; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 987; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 988; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 989; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 990; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 991; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 992; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 993; GFX90A: atomicrmw.end: 994; GFX90A-NEXT: ret float [[TMP5]] 995; 996; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 997; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 998; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 999; GFX940-NEXT: ret float [[RES]] 1000; 1001; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1002; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1003; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1004; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1005; GFX10: atomicrmw.start: 1006; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1007; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1008; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1009; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1010; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1011; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1012; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1013; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1014; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1015; GFX10: atomicrmw.end: 1016; GFX10-NEXT: ret float [[TMP5]] 1017; 1018; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1019; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1020; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1021; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 1022; GFX11: atomicrmw.start: 1023; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1024; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1025; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1026; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1027; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1028; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1029; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1030; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1031; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1032; GFX11: atomicrmw.end: 1033; GFX11-NEXT: ret float [[TMP5]] 1034; 1035; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 1036; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1037; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1038; GFX12-NEXT: ret float [[TMP5]] 1039; 1040 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1041 ret float %res 1042} 1043 1044define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1045; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1046; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1047; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1048; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1049; GFX803: atomicrmw.start: 1050; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1051; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1052; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1053; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1054; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1055; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1056; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1057; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1058; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1059; GFX803: atomicrmw.end: 1060; GFX803-NEXT: ret float [[TMP5]] 1061; 1062; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1063; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1064; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1065; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1066; GFX906: atomicrmw.start: 1067; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1068; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1069; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1070; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1071; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1072; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1073; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1074; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1075; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1076; GFX906: atomicrmw.end: 1077; GFX906-NEXT: ret float [[TMP5]] 1078; 1079; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1080; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1081; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1082; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1083; GFX908: atomicrmw.start: 1084; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1085; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1086; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1087; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1088; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1089; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1090; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1091; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1092; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1093; GFX908: atomicrmw.end: 1094; GFX908-NEXT: ret float [[TMP5]] 1095; 1096; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1097; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1098; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1099; GFX90A-NEXT: ret float [[TMP5]] 1100; 1101; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1102; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1103; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1104; GFX940-NEXT: ret float [[RES]] 1105; 1106; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1107; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1108; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1109; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1110; GFX10: atomicrmw.start: 1111; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1112; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1113; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1114; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1115; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1116; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1117; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1118; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1119; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1120; GFX10: atomicrmw.end: 1121; GFX10-NEXT: ret float [[TMP5]] 1122; 1123; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1124; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1125; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1126; GFX11-NEXT: ret float [[TMP5]] 1127; 1128; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1129; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1130; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1131; GFX12-NEXT: ret float [[TMP5]] 1132; 1133 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1134 ret float %res 1135} 1136 1137define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 { 1138; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1139; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1140; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1141; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1142; GFX803: atomicrmw.start: 1143; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1144; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1145; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1146; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1147; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1148; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1149; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1150; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1151; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1152; GFX803: atomicrmw.end: 1153; GFX803-NEXT: ret float [[TMP5]] 1154; 1155; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1156; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1157; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1158; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1159; GFX906: atomicrmw.start: 1160; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1161; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1162; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1163; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1164; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1165; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1166; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1167; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1168; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1169; GFX906: atomicrmw.end: 1170; GFX906-NEXT: ret float [[TMP5]] 1171; 1172; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1173; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1174; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1175; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1176; GFX908: atomicrmw.start: 1177; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1178; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1179; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1180; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1181; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1182; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1183; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1184; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1185; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1186; GFX908: atomicrmw.end: 1187; GFX908-NEXT: ret float [[TMP5]] 1188; 1189; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1190; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1191; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1192; GFX90A-NEXT: ret float [[TMP5]] 1193; 1194; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1195; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1196; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1197; GFX940-NEXT: ret float [[RES]] 1198; 1199; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1200; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1201; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1202; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1203; GFX10: atomicrmw.start: 1204; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1205; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1206; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1207; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1208; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1209; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1210; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1211; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1212; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1213; GFX10: atomicrmw.end: 1214; GFX10-NEXT: ret float [[TMP5]] 1215; 1216; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1217; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1218; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1219; GFX11-NEXT: ret float [[TMP5]] 1220; 1221; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 1222; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1223; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1224; GFX12-NEXT: ret float [[TMP5]] 1225; 1226 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1227 ret float %res 1228} 1229 1230define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 1231; GFX803-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1232; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1233; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1234; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1235; GFX803: atomicrmw.start: 1236; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1237; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1238; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1239; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1240; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1241; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1242; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1243; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1244; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1245; GFX803: atomicrmw.end: 1246; GFX803-NEXT: ret float [[TMP5]] 1247; 1248; GFX906-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1249; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1250; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1251; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1252; GFX906: atomicrmw.start: 1253; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1254; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1255; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1256; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1257; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1258; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1259; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1260; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1261; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1262; GFX906: atomicrmw.end: 1263; GFX906-NEXT: ret float [[TMP5]] 1264; 1265; GFX908-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1266; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1267; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1268; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1269; GFX908: atomicrmw.start: 1270; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1271; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1272; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1273; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1274; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1275; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1276; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1277; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1278; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1279; GFX908: atomicrmw.end: 1280; GFX908-NEXT: ret float [[TMP5]] 1281; 1282; GFX90A-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1283; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1284; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1285; GFX90A-NEXT: ret float [[TMP5]] 1286; 1287; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1288; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1289; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1290; GFX940-NEXT: ret float [[RES]] 1291; 1292; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1293; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1294; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1295; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1296; GFX10: atomicrmw.start: 1297; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1298; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1299; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1300; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1301; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1302; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1303; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1304; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1305; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1306; GFX10: atomicrmw.end: 1307; GFX10-NEXT: ret float [[TMP5]] 1308; 1309; GFX11-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1310; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1311; GFX11-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1312; GFX11-NEXT: ret float [[TMP5]] 1313; 1314; GFX12-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 1315; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1316; GFX12-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 1317; GFX12-NEXT: ret float [[TMP5]] 1318; 1319 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 1320 ret float %res 1321} 1322 1323;--------------------------------------------------------------------- 1324; atomicrmw fadd (no return) 1325;--------------------------------------------------------------------- 1326 1327define void @test_atomicrmw_fadd_noret_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 1328; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1329; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1330; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1331; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1332; GFX803: atomicrmw.start: 1333; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1334; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1335; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1336; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1337; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1338; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1339; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1340; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1341; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1342; GFX803: atomicrmw.end: 1343; GFX803-NEXT: ret void 1344; 1345; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1346; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1347; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1348; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1349; GFX906: atomicrmw.start: 1350; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1351; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1352; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1353; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1354; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1355; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1356; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1357; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1358; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1359; GFX906: atomicrmw.end: 1360; GFX906-NEXT: ret void 1361; 1362; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1363; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1364; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1365; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1366; GFX908: atomicrmw.start: 1367; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1368; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1369; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1370; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1371; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1372; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1373; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1374; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1375; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1376; GFX908: atomicrmw.end: 1377; GFX908-NEXT: ret void 1378; 1379; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1380; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1381; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1382; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1383; GFX90A: atomicrmw.start: 1384; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1385; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1386; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1387; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1388; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1389; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1390; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1391; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1392; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1393; GFX90A: atomicrmw.end: 1394; GFX90A-NEXT: ret void 1395; 1396; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1397; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1398; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 1399; GFX940-NEXT: ret void 1400; 1401; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1402; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1403; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1404; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1405; GFX10: atomicrmw.start: 1406; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1407; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1408; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1409; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1410; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1411; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1412; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1413; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1414; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1415; GFX10: atomicrmw.end: 1416; GFX10-NEXT: ret void 1417; 1418; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1419; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1420; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1421; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 1422; GFX11: atomicrmw.start: 1423; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1424; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1425; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1426; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1427; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1428; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1429; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1430; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1431; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1432; GFX11: atomicrmw.end: 1433; GFX11-NEXT: ret void 1434; 1435; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( 1436; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1437; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 1438; GFX12-NEXT: ret void 1439; 1440 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 1441 ret void 1442} 1443 1444define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 1445; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1446; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1447; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1448; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1449; GFX803: atomicrmw.start: 1450; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1451; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1452; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1453; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1454; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1455; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1456; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1457; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1458; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1459; GFX803: atomicrmw.end: 1460; GFX803-NEXT: ret void 1461; 1462; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1463; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1464; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1465; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1466; GFX906: atomicrmw.start: 1467; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1468; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1469; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1470; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1471; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1472; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1473; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1474; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1475; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1476; GFX906: atomicrmw.end: 1477; GFX906-NEXT: ret void 1478; 1479; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1480; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1481; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1482; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1483; GFX908: atomicrmw.start: 1484; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1485; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1486; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1487; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1488; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1489; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1490; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1491; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1492; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1493; GFX908: atomicrmw.end: 1494; GFX908-NEXT: ret void 1495; 1496; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1497; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1498; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1499; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1500; GFX90A: atomicrmw.start: 1501; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1502; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1503; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1504; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1505; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1506; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1507; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1508; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1509; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1510; GFX90A: atomicrmw.end: 1511; GFX90A-NEXT: ret void 1512; 1513; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1514; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1515; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1516; GFX940-NEXT: ret void 1517; 1518; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1519; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1520; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1521; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1522; GFX10: atomicrmw.start: 1523; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1524; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1525; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1526; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1527; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1528; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1529; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1530; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1531; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1532; GFX10: atomicrmw.end: 1533; GFX10-NEXT: ret void 1534; 1535; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1536; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1537; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1538; GFX11-NEXT: ret void 1539; 1540; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( 1541; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1542; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 1543; GFX12-NEXT: ret void 1544; 1545 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1546 ret void 1547} 1548 1549define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1550; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1551; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1552; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1553; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1554; GFX803: atomicrmw.start: 1555; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1556; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1557; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1558; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1559; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1560; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1561; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1562; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1563; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1564; GFX803: atomicrmw.end: 1565; GFX803-NEXT: ret void 1566; 1567; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1568; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1569; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1570; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1571; GFX906: atomicrmw.start: 1572; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1573; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1574; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1575; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1576; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1577; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1578; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1579; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1580; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1581; GFX906: atomicrmw.end: 1582; GFX906-NEXT: ret void 1583; 1584; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1585; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1586; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1587; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1588; GFX908: atomicrmw.start: 1589; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1590; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1591; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1592; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1593; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1594; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1595; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1596; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1597; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1598; GFX908: atomicrmw.end: 1599; GFX908-NEXT: ret void 1600; 1601; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1602; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1603; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1604; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1605; GFX90A: atomicrmw.start: 1606; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1607; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1608; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1609; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1610; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1611; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1612; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1613; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1614; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1615; GFX90A: atomicrmw.end: 1616; GFX90A-NEXT: ret void 1617; 1618; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1619; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1620; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1621; GFX940-NEXT: ret void 1622; 1623; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1624; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1625; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1626; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1627; GFX10: atomicrmw.start: 1628; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1629; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1630; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1631; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1632; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1633; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1634; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1635; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1636; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1637; GFX10: atomicrmw.end: 1638; GFX10-NEXT: ret void 1639; 1640; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1641; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1642; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1643; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 1644; GFX11: atomicrmw.start: 1645; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1646; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1647; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1648; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1649; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1650; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1651; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1652; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1653; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1654; GFX11: atomicrmw.end: 1655; GFX11-NEXT: ret void 1656; 1657; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( 1658; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1659; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 1660; GFX12-NEXT: ret void 1661; 1662 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 1663 ret void 1664} 1665 1666define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 1667; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1668; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1669; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1670; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1671; GFX803: atomicrmw.start: 1672; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1673; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1674; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1675; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1676; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1677; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1678; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1679; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1680; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1681; GFX803: atomicrmw.end: 1682; GFX803-NEXT: ret void 1683; 1684; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1685; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1686; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1687; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1688; GFX906: atomicrmw.start: 1689; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1690; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1691; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1692; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1693; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1694; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1695; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1696; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1697; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1698; GFX906: atomicrmw.end: 1699; GFX906-NEXT: ret void 1700; 1701; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1702; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1703; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1704; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1705; GFX908: atomicrmw.start: 1706; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1707; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1708; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1709; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1710; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1711; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1712; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1713; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1714; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1715; GFX908: atomicrmw.end: 1716; GFX908-NEXT: ret void 1717; 1718; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1719; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1720; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1721; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1722; GFX90A: atomicrmw.start: 1723; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1724; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1725; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1726; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1727; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1728; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1729; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1730; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1731; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1732; GFX90A: atomicrmw.end: 1733; GFX90A-NEXT: ret void 1734; 1735; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1736; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1737; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1738; GFX940-NEXT: ret void 1739; 1740; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1741; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1742; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1743; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1744; GFX10: atomicrmw.start: 1745; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1746; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1747; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1748; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1749; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1750; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1751; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1752; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1753; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1754; GFX10: atomicrmw.end: 1755; GFX10-NEXT: ret void 1756; 1757; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1758; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1759; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1760; GFX11-NEXT: ret void 1761; 1762; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 1763; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1764; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1765; GFX12-NEXT: ret void 1766; 1767 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1768 ret void 1769} 1770 1771define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz(ptr addrspace(1) %ptr, float %value) #0 { 1772; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1773; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1774; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1775; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1776; GFX803: atomicrmw.start: 1777; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1778; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1779; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1780; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1781; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1782; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1783; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1784; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1785; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1786; GFX803: atomicrmw.end: 1787; GFX803-NEXT: ret void 1788; 1789; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1790; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1791; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1792; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1793; GFX906: atomicrmw.start: 1794; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1795; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1796; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1797; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1798; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1799; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1800; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1801; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1802; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1803; GFX906: atomicrmw.end: 1804; GFX906-NEXT: ret void 1805; 1806; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1807; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1808; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1809; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1810; GFX908: atomicrmw.start: 1811; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1812; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1813; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1814; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1815; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1816; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1817; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1818; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1819; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1820; GFX908: atomicrmw.end: 1821; GFX908-NEXT: ret void 1822; 1823; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1824; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1825; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1826; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1827; GFX90A: atomicrmw.start: 1828; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1829; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1830; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1831; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1832; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1833; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1834; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1835; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1836; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1837; GFX90A: atomicrmw.end: 1838; GFX90A-NEXT: ret void 1839; 1840; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1841; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1842; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1843; GFX940-NEXT: ret void 1844; 1845; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1846; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1847; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1848; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1849; GFX10: atomicrmw.start: 1850; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1851; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1852; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1853; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1854; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1855; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1856; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1857; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1858; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1859; GFX10: atomicrmw.end: 1860; GFX10-NEXT: ret void 1861; 1862; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1863; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1864; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1865; GFX11-NEXT: ret void 1866; 1867; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( 1868; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 1869; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1870; GFX12-NEXT: ret void 1871; 1872 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1873 ret void 1874} 1875 1876define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 1877; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1878; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1879; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1880; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1881; GFX803: atomicrmw.start: 1882; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1883; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1884; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1885; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1886; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1887; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1888; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1889; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1890; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1891; GFX803: atomicrmw.end: 1892; GFX803-NEXT: ret void 1893; 1894; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1895; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1896; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1897; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 1898; GFX906: atomicrmw.start: 1899; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1900; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1901; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1902; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1903; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1904; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1905; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1906; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1907; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1908; GFX906: atomicrmw.end: 1909; GFX906-NEXT: ret void 1910; 1911; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1912; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1913; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1914; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 1915; GFX908: atomicrmw.start: 1916; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1917; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1918; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1919; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1920; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1921; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1922; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1923; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1924; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1925; GFX908: atomicrmw.end: 1926; GFX908-NEXT: ret void 1927; 1928; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1929; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1930; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1931; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 1932; GFX90A: atomicrmw.start: 1933; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1934; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1935; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1936; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1937; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1938; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1939; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1940; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1941; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1942; GFX90A: atomicrmw.end: 1943; GFX90A-NEXT: ret void 1944; 1945; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1946; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1947; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1948; GFX940-NEXT: ret void 1949; 1950; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1951; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1952; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1953; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 1954; GFX10: atomicrmw.start: 1955; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1956; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1957; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1958; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1959; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1960; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1961; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1962; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1963; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1964; GFX10: atomicrmw.end: 1965; GFX10-NEXT: ret void 1966; 1967; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1968; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1969; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1970; GFX11-NEXT: ret void 1971; 1972; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( 1973; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 1974; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 1975; GFX12-NEXT: ret void 1976; 1977 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 1978 ret void 1979} 1980 1981define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 1982; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 1983; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 1984; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 1985; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 1986; GFX803: atomicrmw.start: 1987; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 1988; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 1989; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 1990; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 1991; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 1992; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 1993; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 1994; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 1995; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 1996; GFX803: atomicrmw.end: 1997; GFX803-NEXT: ret void 1998; 1999; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2000; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2001; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2002; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2003; GFX906: atomicrmw.start: 2004; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2005; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2006; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2007; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2008; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2009; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2010; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2011; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2012; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2013; GFX906: atomicrmw.end: 2014; GFX906-NEXT: ret void 2015; 2016; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2017; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2018; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2019; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2020; GFX908: atomicrmw.start: 2021; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2022; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2023; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2024; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2025; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2026; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2027; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2028; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2029; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2030; GFX908: atomicrmw.end: 2031; GFX908-NEXT: ret void 2032; 2033; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2034; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2035; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2036; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2037; GFX90A: atomicrmw.start: 2038; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2039; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2040; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2041; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2042; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2043; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2044; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2045; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2046; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2047; GFX90A: atomicrmw.end: 2048; GFX90A-NEXT: ret void 2049; 2050; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2051; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2052; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 2053; GFX940-NEXT: ret void 2054; 2055; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2056; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2057; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2058; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2059; GFX10: atomicrmw.start: 2060; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2061; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2062; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2063; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2064; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2065; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2066; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2067; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2068; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2069; GFX10: atomicrmw.end: 2070; GFX10-NEXT: ret void 2071; 2072; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2073; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2074; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2075; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 2076; GFX11: atomicrmw.start: 2077; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2078; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2079; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2080; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2081; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2082; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2083; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2084; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2085; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2086; GFX11: atomicrmw.end: 2087; GFX11-NEXT: ret void 2088; 2089; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( 2090; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2091; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 2092; GFX12-NEXT: ret void 2093; 2094 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 2095 ret void 2096} 2097 2098define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2099; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2100; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2101; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2102; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2103; GFX803: atomicrmw.start: 2104; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2105; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2106; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2107; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2108; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2109; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2110; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2111; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2112; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2113; GFX803: atomicrmw.end: 2114; GFX803-NEXT: ret void 2115; 2116; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2117; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2118; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2119; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2120; GFX906: atomicrmw.start: 2121; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2122; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2123; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2124; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2125; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2126; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2127; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2128; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2129; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2130; GFX906: atomicrmw.end: 2131; GFX906-NEXT: ret void 2132; 2133; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2134; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2135; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2136; GFX908-NEXT: ret void 2137; 2138; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2139; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2140; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2141; GFX90A-NEXT: ret void 2142; 2143; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2144; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2145; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2146; GFX940-NEXT: ret void 2147; 2148; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2149; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2150; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2151; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2152; GFX10: atomicrmw.start: 2153; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2154; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2155; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2156; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2157; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2158; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2159; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2160; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2161; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2162; GFX10: atomicrmw.end: 2163; GFX10-NEXT: ret void 2164; 2165; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2166; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2167; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2168; GFX11-NEXT: ret void 2169; 2170; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2171; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2172; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2173; GFX12-NEXT: ret void 2174; 2175 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 2176 ret void 2177} 2178 2179define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2180; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2181; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2182; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2183; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2184; GFX803: atomicrmw.start: 2185; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2186; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2187; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2188; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2189; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2190; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2191; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2192; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2193; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2194; GFX803: atomicrmw.end: 2195; GFX803-NEXT: ret void 2196; 2197; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2198; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2199; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2200; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2201; GFX906: atomicrmw.start: 2202; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2203; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2204; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2205; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2206; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2207; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2208; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2209; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2210; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2211; GFX906: atomicrmw.end: 2212; GFX906-NEXT: ret void 2213; 2214; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2215; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2216; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2217; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2218; GFX908: atomicrmw.start: 2219; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2220; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2221; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2222; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2223; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2224; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2225; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2226; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2227; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2228; GFX908: atomicrmw.end: 2229; GFX908-NEXT: ret void 2230; 2231; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2232; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2233; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2234; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2235; GFX90A: atomicrmw.start: 2236; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2237; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2238; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2239; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2240; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2241; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2242; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2243; GFX90A-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2244; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2245; GFX90A: atomicrmw.end: 2246; GFX90A-NEXT: ret void 2247; 2248; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2249; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2250; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2251; GFX940-NEXT: ret void 2252; 2253; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2254; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2255; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2256; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2257; GFX10: atomicrmw.start: 2258; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2259; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2260; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2261; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2262; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2263; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2264; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2265; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2266; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2267; GFX10: atomicrmw.end: 2268; GFX10-NEXT: ret void 2269; 2270; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2271; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2272; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2273; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 2274; GFX11: atomicrmw.start: 2275; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2276; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2277; GFX11-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2278; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2279; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2280; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2281; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2282; GFX11-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2283; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2284; GFX11: atomicrmw.end: 2285; GFX11-NEXT: ret void 2286; 2287; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2288; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2289; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2290; GFX12-NEXT: ret void 2291; 2292 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2293 ret void 2294} 2295 2296define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2297; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2298; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2299; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2300; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2301; GFX803: atomicrmw.start: 2302; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2303; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2304; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2305; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2306; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2307; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2308; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2309; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2310; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2311; GFX803: atomicrmw.end: 2312; GFX803-NEXT: ret void 2313; 2314; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2315; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2316; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2317; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2318; GFX906: atomicrmw.start: 2319; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2320; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2321; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2322; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2323; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2324; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2325; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2326; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2327; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2328; GFX906: atomicrmw.end: 2329; GFX906-NEXT: ret void 2330; 2331; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2332; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2333; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2334; GFX908-NEXT: ret void 2335; 2336; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2337; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2338; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2339; GFX90A-NEXT: ret void 2340; 2341; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2342; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2343; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2344; GFX940-NEXT: ret void 2345; 2346; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2347; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2348; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2349; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2350; GFX10: atomicrmw.start: 2351; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2352; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2353; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2354; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2355; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2356; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2357; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2358; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2359; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2360; GFX10: atomicrmw.end: 2361; GFX10-NEXT: ret void 2362; 2363; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2364; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2365; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2366; GFX11-NEXT: ret void 2367; 2368; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2369; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2370; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2371; GFX12-NEXT: ret void 2372; 2373 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2374 ret void 2375} 2376 2377define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, float %value) #0 { 2378; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2379; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2380; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2381; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2382; GFX803: atomicrmw.start: 2383; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2384; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2385; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2386; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2387; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2388; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2389; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2390; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2391; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2392; GFX803: atomicrmw.end: 2393; GFX803-NEXT: ret void 2394; 2395; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2396; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2397; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2398; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2399; GFX906: atomicrmw.start: 2400; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2401; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2402; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2403; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2404; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2405; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2406; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2407; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2408; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2409; GFX906: atomicrmw.end: 2410; GFX906-NEXT: ret void 2411; 2412; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2413; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2414; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2415; GFX908-NEXT: ret void 2416; 2417; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2418; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2419; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2420; GFX90A-NEXT: ret void 2421; 2422; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2423; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2424; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2425; GFX940-NEXT: ret void 2426; 2427; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2428; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2429; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2430; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2431; GFX10: atomicrmw.start: 2432; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2433; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2434; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2435; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2436; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2437; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2438; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2439; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2440; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2441; GFX10: atomicrmw.end: 2442; GFX10-NEXT: ret void 2443; 2444; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2445; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2446; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2447; GFX11-NEXT: ret void 2448; 2449; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( 2450; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { 2451; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2452; GFX12-NEXT: ret void 2453; 2454 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2455 ret void 2456} 2457 2458define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, float %value) #1 { 2459; GFX803-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2460; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2461; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2462; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2463; GFX803: atomicrmw.start: 2464; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2465; GFX803-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2466; GFX803-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2467; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2468; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2469; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2470; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2471; GFX803-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2472; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2473; GFX803: atomicrmw.end: 2474; GFX803-NEXT: ret void 2475; 2476; GFX906-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2477; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2478; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2479; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2480; GFX906: atomicrmw.start: 2481; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2482; GFX906-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2483; GFX906-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2484; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2485; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2486; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2487; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2488; GFX906-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2489; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2490; GFX906: atomicrmw.end: 2491; GFX906-NEXT: ret void 2492; 2493; GFX908-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2494; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2495; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2496; GFX908-NEXT: ret void 2497; 2498; GFX90A-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2499; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2500; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2501; GFX90A-NEXT: ret void 2502; 2503; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2504; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2505; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2506; GFX940-NEXT: ret void 2507; 2508; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2509; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2510; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2511; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2512; GFX10: atomicrmw.start: 2513; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2514; GFX10-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE]] 2515; GFX10-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2516; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2517; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2518; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2519; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2520; GFX10-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2521; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2522; GFX10: atomicrmw.end: 2523; GFX10-NEXT: ret void 2524; 2525; GFX11-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2526; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2527; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2528; GFX11-NEXT: ret void 2529; 2530; GFX12-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( 2531; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { 2532; GFX12-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 2533; GFX12-NEXT: ret void 2534; 2535 %res = atomicrmw fadd ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2536 ret void 2537} 2538 2539;--------------------------------------------------------------------- 2540; atomicrmw fsub 2541;--------------------------------------------------------------------- 2542 2543define float @test_atomicrmw_fsub_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 2544; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent( 2545; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2546; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2547; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2548; COMMON: atomicrmw.start: 2549; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2550; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2551; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2552; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2553; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2554; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2555; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2556; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2557; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2558; COMMON: atomicrmw.end: 2559; COMMON-NEXT: ret float [[RES]] 2560; 2561 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 2562 ret float %res 2563} 2564 2565define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2566; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory( 2567; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2568; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2569; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2570; COMMON: atomicrmw.start: 2571; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2572; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2573; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2574; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2575; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2576; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2577; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2578; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2579; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2580; COMMON: atomicrmw.end: 2581; COMMON-NEXT: ret float [[RES]] 2582; 2583 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 2584 ret float %res 2585} 2586 2587define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2588; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_remote_memory( 2589; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2590; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2591; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2592; COMMON: atomicrmw.start: 2593; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2594; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2595; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2596; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2597; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2598; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2599; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2600; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2601; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2602; COMMON: atomicrmw.end: 2603; COMMON-NEXT: ret float [[RES]] 2604; 2605 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 2606 ret float %res 2607} 2608 2609define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2610; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2611; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2612; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2613; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2614; COMMON: atomicrmw.start: 2615; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 2616; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2617; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2618; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2619; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2620; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2621; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2622; COMMON-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 2623; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2624; COMMON: atomicrmw.end: 2625; COMMON-NEXT: ret float [[RES]] 2626; 2627 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 2628 ret float %res 2629} 2630 2631define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 2632; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode( 2633; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2634; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2635; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2636; COMMON: atomicrmw.start: 2637; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2638; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2639; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2640; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2641; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 2642; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2643; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2644; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2645; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2646; COMMON: atomicrmw.end: 2647; COMMON-NEXT: ret float [[TMP5]] 2648; 2649 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 2650 ret float %res 2651} 2652 2653define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2654; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 2655; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2656; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2657; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2658; COMMON: atomicrmw.start: 2659; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2660; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2661; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2662; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2663; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2664; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2665; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2666; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2667; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2668; COMMON: atomicrmw.end: 2669; COMMON-NEXT: ret float [[TMP5]] 2670; 2671 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 2672 ret float %res 2673} 2674 2675define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2676; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 2677; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2678; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2679; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2680; COMMON: atomicrmw.start: 2681; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2682; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2683; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2684; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2685; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2686; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2687; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2688; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2689; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2690; COMMON: atomicrmw.end: 2691; COMMON-NEXT: ret float [[TMP5]] 2692; 2693 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2694 ret float %res 2695} 2696 2697define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2698; COMMON-LABEL: define float @test_atomicrmw_fsub_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 2699; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2700; COMMON-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2701; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] 2702; COMMON: atomicrmw.start: 2703; COMMON-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] 2704; COMMON-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE]] 2705; COMMON-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 2706; COMMON-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 2707; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 2708; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 2709; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 2710; COMMON-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float 2711; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2712; COMMON: atomicrmw.end: 2713; COMMON-NEXT: ret float [[TMP5]] 2714; 2715 %res = atomicrmw fsub ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 2716 ret float %res 2717} 2718 2719;--------------------------------------------------------------------- 2720; atomicrmw fmax 2721;--------------------------------------------------------------------- 2722 2723define float @test_atomicrmw_fmax_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 2724; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2725; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2726; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2727; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2728; GFX803: atomicrmw.start: 2729; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2730; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2731; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2732; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2733; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2734; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2735; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2736; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2737; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2738; GFX803: atomicrmw.end: 2739; GFX803-NEXT: ret float [[TMP6]] 2740; 2741; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2742; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2743; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2744; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2745; GFX906: atomicrmw.start: 2746; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2747; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2748; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2749; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2750; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2751; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2752; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2753; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2754; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2755; GFX906: atomicrmw.end: 2756; GFX906-NEXT: ret float [[TMP6]] 2757; 2758; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2759; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2760; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2761; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2762; GFX908: atomicrmw.start: 2763; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2764; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2765; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2766; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2767; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2768; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2769; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2770; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2771; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2772; GFX908: atomicrmw.end: 2773; GFX908-NEXT: ret float [[TMP6]] 2774; 2775; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2776; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2777; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2778; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2779; GFX90A: atomicrmw.start: 2780; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2781; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2782; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2783; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2784; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2785; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2786; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2787; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2788; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2789; GFX90A: atomicrmw.end: 2790; GFX90A-NEXT: ret float [[TMP6]] 2791; 2792; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2793; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2794; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2795; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2796; GFX940: atomicrmw.start: 2797; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2798; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2799; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2800; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2801; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2802; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2803; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2804; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2805; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2806; GFX940: atomicrmw.end: 2807; GFX940-NEXT: ret float [[TMP6]] 2808; 2809; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2810; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2811; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2812; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 2813; GFX10: atomicrmw.start: 2814; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2815; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2816; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2817; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2818; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2819; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2820; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2821; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2822; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2823; GFX10: atomicrmw.end: 2824; GFX10-NEXT: ret float [[TMP6]] 2825; 2826; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2827; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2828; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2829; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 2830; GFX11: atomicrmw.start: 2831; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2832; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2833; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2834; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2835; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 2836; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2837; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2838; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2839; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2840; GFX11: atomicrmw.end: 2841; GFX11-NEXT: ret float [[TMP6]] 2842; 2843; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( 2844; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2845; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 2846; GFX12-NEXT: ret float [[RES]] 2847; 2848 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 2849 ret float %res 2850} 2851 2852define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 2853; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2854; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2855; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2856; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2857; GFX803: atomicrmw.start: 2858; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2859; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2860; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2861; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2862; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2863; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2864; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2865; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2866; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2867; GFX803: atomicrmw.end: 2868; GFX803-NEXT: ret float [[TMP6]] 2869; 2870; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2871; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2872; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2873; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2874; GFX906: atomicrmw.start: 2875; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2876; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2877; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2878; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2879; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2880; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2881; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2882; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2883; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2884; GFX906: atomicrmw.end: 2885; GFX906-NEXT: ret float [[TMP6]] 2886; 2887; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2888; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2889; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2890; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2891; GFX908: atomicrmw.start: 2892; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2893; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2894; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2895; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2896; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2897; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2898; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2899; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2900; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2901; GFX908: atomicrmw.end: 2902; GFX908-NEXT: ret float [[TMP6]] 2903; 2904; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2905; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2906; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2907; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 2908; GFX90A: atomicrmw.start: 2909; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2910; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2911; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2912; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2913; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2914; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2915; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2916; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2917; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2918; GFX90A: atomicrmw.end: 2919; GFX90A-NEXT: ret float [[TMP6]] 2920; 2921; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2922; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2923; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2924; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 2925; GFX940: atomicrmw.start: 2926; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2927; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2928; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2929; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2930; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2931; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2932; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2933; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2934; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2935; GFX940: atomicrmw.end: 2936; GFX940-NEXT: ret float [[TMP6]] 2937; 2938; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2939; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2940; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2941; GFX10-NEXT: ret float [[RES]] 2942; 2943; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2944; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2945; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2946; GFX11-NEXT: ret float [[RES]] 2947; 2948; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( 2949; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2950; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 2951; GFX12-NEXT: ret float [[RES]] 2952; 2953 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 2954 ret float %res 2955} 2956 2957define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 2958; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 2959; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2960; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2961; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 2962; GFX803: atomicrmw.start: 2963; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2964; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2965; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2966; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2967; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2968; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2969; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2970; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2971; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2972; GFX803: atomicrmw.end: 2973; GFX803-NEXT: ret float [[TMP6]] 2974; 2975; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 2976; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2977; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2978; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 2979; GFX906: atomicrmw.start: 2980; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2981; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2982; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 2983; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 2984; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 2985; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 2986; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 2987; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 2988; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 2989; GFX906: atomicrmw.end: 2990; GFX906-NEXT: ret float [[TMP6]] 2991; 2992; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 2993; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 2994; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 2995; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 2996; GFX908: atomicrmw.start: 2997; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 2998; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 2999; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3000; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3001; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3002; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3003; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3004; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3005; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3006; GFX908: atomicrmw.end: 3007; GFX908-NEXT: ret float [[TMP6]] 3008; 3009; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 3010; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3011; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3012; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3013; GFX90A: atomicrmw.start: 3014; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3015; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3016; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3017; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3018; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3019; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3020; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3021; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3022; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3023; GFX90A: atomicrmw.end: 3024; GFX90A-NEXT: ret float [[TMP6]] 3025; 3026; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 3027; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3028; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3029; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3030; GFX940: atomicrmw.start: 3031; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3032; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3033; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3034; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3035; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3036; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3037; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3038; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3039; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3040; GFX940: atomicrmw.end: 3041; GFX940-NEXT: ret float [[TMP6]] 3042; 3043; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 3044; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3045; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3046; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3047; GFX10: atomicrmw.start: 3048; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3049; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3050; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3051; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3052; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3053; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3054; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3055; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3056; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3057; GFX10: atomicrmw.end: 3058; GFX10-NEXT: ret float [[RES]] 3059; 3060; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 3061; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3062; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3063; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3064; GFX11: atomicrmw.start: 3065; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3066; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3067; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3068; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3069; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3070; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3071; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3072; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3073; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3074; GFX11: atomicrmw.end: 3075; GFX11-NEXT: ret float [[RES]] 3076; 3077; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( 3078; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3079; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3080; GFX12-NEXT: ret float [[RES]] 3081; 3082 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 3083 ret float %res 3084} 3085 3086define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3087; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3088; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3089; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3090; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3091; GFX803: atomicrmw.start: 3092; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3093; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3094; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3095; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3096; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3097; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3098; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3099; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3100; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3101; GFX803: atomicrmw.end: 3102; GFX803-NEXT: ret float [[TMP6]] 3103; 3104; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3105; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3106; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3107; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3108; GFX906: atomicrmw.start: 3109; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3110; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3111; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3112; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3113; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3114; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3115; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3116; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3117; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3118; GFX906: atomicrmw.end: 3119; GFX906-NEXT: ret float [[TMP6]] 3120; 3121; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3122; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3123; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3124; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3125; GFX908: atomicrmw.start: 3126; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3127; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3128; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3129; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3130; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3131; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3132; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3133; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3134; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3135; GFX908: atomicrmw.end: 3136; GFX908-NEXT: ret float [[TMP6]] 3137; 3138; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3139; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3140; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3141; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3142; GFX90A: atomicrmw.start: 3143; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3144; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3145; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3146; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3147; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3148; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3149; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3150; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3151; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3152; GFX90A: atomicrmw.end: 3153; GFX90A-NEXT: ret float [[TMP6]] 3154; 3155; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3156; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3157; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3158; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3159; GFX940: atomicrmw.start: 3160; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3161; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3162; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3163; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3164; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3165; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3166; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3167; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3168; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3169; GFX940: atomicrmw.end: 3170; GFX940-NEXT: ret float [[TMP6]] 3171; 3172; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3173; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3174; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3175; GFX10-NEXT: ret float [[RES]] 3176; 3177; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3178; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3179; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3180; GFX11-NEXT: ret float [[RES]] 3181; 3182; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3183; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3184; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3185; GFX12-NEXT: ret float [[RES]] 3186; 3187 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 3188 ret float %res 3189} 3190 3191define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 3192; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3193; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3194; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3195; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3196; GFX803: atomicrmw.start: 3197; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3198; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3199; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3200; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3201; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3202; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3203; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3204; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3205; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3206; GFX803: atomicrmw.end: 3207; GFX803-NEXT: ret float [[TMP6]] 3208; 3209; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3210; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3211; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3212; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3213; GFX906: atomicrmw.start: 3214; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3215; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3216; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3217; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3218; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3219; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3220; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3221; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3222; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3223; GFX906: atomicrmw.end: 3224; GFX906-NEXT: ret float [[TMP6]] 3225; 3226; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3227; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3228; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3229; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3230; GFX908: atomicrmw.start: 3231; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3232; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3233; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3234; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3235; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3236; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3237; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3238; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3239; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3240; GFX908: atomicrmw.end: 3241; GFX908-NEXT: ret float [[TMP6]] 3242; 3243; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3244; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3245; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3246; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3247; GFX90A: atomicrmw.start: 3248; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3249; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3250; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3251; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3252; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3253; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3254; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3255; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3256; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3257; GFX90A: atomicrmw.end: 3258; GFX90A-NEXT: ret float [[TMP6]] 3259; 3260; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3261; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3262; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3263; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3264; GFX940: atomicrmw.start: 3265; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3266; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3267; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3268; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3269; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3270; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3271; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3272; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3273; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3274; GFX940: atomicrmw.end: 3275; GFX940-NEXT: ret float [[TMP6]] 3276; 3277; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3278; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3279; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3280; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3281; GFX10: atomicrmw.start: 3282; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3283; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3284; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3285; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3286; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3287; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3288; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3289; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3290; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3291; GFX10: atomicrmw.end: 3292; GFX10-NEXT: ret float [[TMP6]] 3293; 3294; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3295; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3296; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3297; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3298; GFX11: atomicrmw.start: 3299; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3300; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3301; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3302; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3303; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3304; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3305; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3306; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3307; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3308; GFX11: atomicrmw.end: 3309; GFX11-NEXT: ret float [[TMP6]] 3310; 3311; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( 3312; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3313; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 3314; GFX12-NEXT: ret float [[RES]] 3315; 3316 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 3317 ret float %res 3318} 3319 3320define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 3321; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3322; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3323; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3324; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3325; GFX803: atomicrmw.start: 3326; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3327; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3328; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3329; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3330; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3331; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3332; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3333; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3334; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3335; GFX803: atomicrmw.end: 3336; GFX803-NEXT: ret float [[TMP6]] 3337; 3338; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3339; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3340; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3341; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3342; GFX906: atomicrmw.start: 3343; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3344; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3345; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3346; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3347; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3348; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3349; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3350; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3351; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3352; GFX906: atomicrmw.end: 3353; GFX906-NEXT: ret float [[TMP6]] 3354; 3355; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3356; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3357; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3358; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3359; GFX908: atomicrmw.start: 3360; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3361; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3362; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3363; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3364; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3365; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3366; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3367; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3368; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3369; GFX908: atomicrmw.end: 3370; GFX908-NEXT: ret float [[TMP6]] 3371; 3372; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3373; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3374; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3375; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3376; GFX90A: atomicrmw.start: 3377; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3378; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3379; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3380; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3381; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3382; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3383; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3384; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3385; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3386; GFX90A: atomicrmw.end: 3387; GFX90A-NEXT: ret float [[TMP6]] 3388; 3389; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3390; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3391; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3392; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3393; GFX940: atomicrmw.start: 3394; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3395; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3396; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3397; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3398; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3399; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3400; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3401; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3402; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3403; GFX940: atomicrmw.end: 3404; GFX940-NEXT: ret float [[TMP6]] 3405; 3406; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3407; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3408; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3409; GFX10-NEXT: ret float [[RES]] 3410; 3411; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3412; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3413; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3414; GFX11-NEXT: ret float [[RES]] 3415; 3416; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 3417; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3418; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3419; GFX12-NEXT: ret float [[RES]] 3420; 3421 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 3422 ret float %res 3423} 3424 3425define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3426; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3427; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3428; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3429; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3430; GFX803: atomicrmw.start: 3431; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3432; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3433; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3434; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3435; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3436; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3437; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3438; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3439; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3440; GFX803: atomicrmw.end: 3441; GFX803-NEXT: ret float [[TMP6]] 3442; 3443; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3444; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3445; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3446; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3447; GFX906: atomicrmw.start: 3448; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3449; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3450; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3451; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3452; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3453; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3454; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3455; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3456; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3457; GFX906: atomicrmw.end: 3458; GFX906-NEXT: ret float [[TMP6]] 3459; 3460; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3461; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3462; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3463; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3464; GFX908: atomicrmw.start: 3465; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3466; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3467; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3468; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3469; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3470; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3471; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3472; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3473; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3474; GFX908: atomicrmw.end: 3475; GFX908-NEXT: ret float [[TMP6]] 3476; 3477; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3478; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3479; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3480; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3481; GFX90A: atomicrmw.start: 3482; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3483; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3484; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3485; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3486; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3487; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3488; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3489; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3490; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3491; GFX90A: atomicrmw.end: 3492; GFX90A-NEXT: ret float [[TMP6]] 3493; 3494; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3495; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3496; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3497; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3498; GFX940: atomicrmw.start: 3499; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3500; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3501; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3502; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3503; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3504; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3505; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3506; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3507; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3508; GFX940: atomicrmw.end: 3509; GFX940-NEXT: ret float [[TMP6]] 3510; 3511; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3512; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3513; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3514; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3515; GFX10: atomicrmw.start: 3516; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3517; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3518; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3519; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3520; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3521; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3522; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3523; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3524; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3525; GFX10: atomicrmw.end: 3526; GFX10-NEXT: ret float [[RES]] 3527; 3528; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3529; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3530; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3531; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3532; GFX11: atomicrmw.start: 3533; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3534; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3535; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3536; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3537; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3538; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3539; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3540; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3541; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3542; GFX11: atomicrmw.end: 3543; GFX11-NEXT: ret float [[RES]] 3544; 3545; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 3546; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3547; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3548; GFX12-NEXT: ret float [[RES]] 3549; 3550 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 3551 ret float %res 3552} 3553 3554define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3555; GFX803-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3556; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3557; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3558; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3559; GFX803: atomicrmw.start: 3560; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3561; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3562; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3563; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3564; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3565; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3566; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3567; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3568; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3569; GFX803: atomicrmw.end: 3570; GFX803-NEXT: ret float [[TMP6]] 3571; 3572; GFX906-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3573; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3574; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3575; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3576; GFX906: atomicrmw.start: 3577; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3578; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3579; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3580; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3581; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3582; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3583; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3584; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3585; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3586; GFX906: atomicrmw.end: 3587; GFX906-NEXT: ret float [[TMP6]] 3588; 3589; GFX908-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3590; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3591; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3592; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3593; GFX908: atomicrmw.start: 3594; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3595; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3596; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3597; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3598; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3599; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3600; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3601; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3602; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3603; GFX908: atomicrmw.end: 3604; GFX908-NEXT: ret float [[TMP6]] 3605; 3606; GFX90A-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3607; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3608; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3609; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3610; GFX90A: atomicrmw.start: 3611; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3612; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3613; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3614; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3615; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3616; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3617; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3618; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3619; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3620; GFX90A: atomicrmw.end: 3621; GFX90A-NEXT: ret float [[TMP6]] 3622; 3623; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3624; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3625; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3626; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3627; GFX940: atomicrmw.start: 3628; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3629; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) 3630; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3631; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3632; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 3633; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3634; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3635; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3636; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3637; GFX940: atomicrmw.end: 3638; GFX940-NEXT: ret float [[TMP6]] 3639; 3640; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3641; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3642; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3643; GFX10-NEXT: ret float [[RES]] 3644; 3645; GFX11-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3646; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3647; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3648; GFX11-NEXT: ret float [[RES]] 3649; 3650; GFX12-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 3651; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3652; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 3653; GFX12-NEXT: ret float [[RES]] 3654; 3655 %res = atomicrmw fmax ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 3656 ret float %res 3657} 3658 3659;--------------------------------------------------------------------- 3660; atomicrmw fmin 3661;--------------------------------------------------------------------- 3662 3663define float @test_atomicrmw_fmin_f32_global_agent(ptr addrspace(1) %ptr, float %value) { 3664; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3665; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3666; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3667; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3668; GFX803: atomicrmw.start: 3669; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3670; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3671; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3672; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3673; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3674; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3675; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3676; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3677; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3678; GFX803: atomicrmw.end: 3679; GFX803-NEXT: ret float [[TMP6]] 3680; 3681; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3682; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3683; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3684; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3685; GFX906: atomicrmw.start: 3686; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3687; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3688; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3689; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3690; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3691; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3692; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3693; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3694; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3695; GFX906: atomicrmw.end: 3696; GFX906-NEXT: ret float [[TMP6]] 3697; 3698; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3699; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3700; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3701; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3702; GFX908: atomicrmw.start: 3703; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3704; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3705; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3706; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3707; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3708; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3709; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3710; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3711; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3712; GFX908: atomicrmw.end: 3713; GFX908-NEXT: ret float [[TMP6]] 3714; 3715; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3716; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3717; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3718; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3719; GFX90A: atomicrmw.start: 3720; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3721; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3722; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3723; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3724; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3725; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3726; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3727; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3728; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3729; GFX90A: atomicrmw.end: 3730; GFX90A-NEXT: ret float [[TMP6]] 3731; 3732; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3733; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3734; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3735; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3736; GFX940: atomicrmw.start: 3737; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3738; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3739; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3740; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3741; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3742; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3743; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3744; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3745; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3746; GFX940: atomicrmw.end: 3747; GFX940-NEXT: ret float [[TMP6]] 3748; 3749; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3750; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3751; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3752; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3753; GFX10: atomicrmw.start: 3754; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3755; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3756; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3757; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3758; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3759; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3760; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3761; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3762; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3763; GFX10: atomicrmw.end: 3764; GFX10-NEXT: ret float [[TMP6]] 3765; 3766; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3767; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3768; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3769; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 3770; GFX11: atomicrmw.start: 3771; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3772; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3773; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3774; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3775; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 3776; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3777; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3778; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3779; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3780; GFX11: atomicrmw.end: 3781; GFX11-NEXT: ret float [[TMP6]] 3782; 3783; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( 3784; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3785; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 3786; GFX12-NEXT: ret float [[RES]] 3787; 3788 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst 3789 ret float %res 3790} 3791 3792define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 3793; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3794; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3795; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3796; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3797; GFX803: atomicrmw.start: 3798; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3799; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3800; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3801; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3802; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3803; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3804; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3805; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3806; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3807; GFX803: atomicrmw.end: 3808; GFX803-NEXT: ret float [[TMP6]] 3809; 3810; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3811; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3812; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3813; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3814; GFX906: atomicrmw.start: 3815; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3816; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3817; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3818; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3819; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3820; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3821; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3822; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3823; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3824; GFX906: atomicrmw.end: 3825; GFX906-NEXT: ret float [[TMP6]] 3826; 3827; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3828; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3829; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3830; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3831; GFX908: atomicrmw.start: 3832; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3833; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3834; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3835; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3836; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3837; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3838; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3839; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3840; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3841; GFX908: atomicrmw.end: 3842; GFX908-NEXT: ret float [[TMP6]] 3843; 3844; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3845; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3846; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3847; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3848; GFX90A: atomicrmw.start: 3849; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3850; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3851; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3852; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3853; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3854; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3855; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3856; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3857; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3858; GFX90A: atomicrmw.end: 3859; GFX90A-NEXT: ret float [[TMP6]] 3860; 3861; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3862; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3863; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3864; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3865; GFX940: atomicrmw.start: 3866; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3867; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3868; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3869; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3870; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3871; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3872; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3873; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3874; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3875; GFX940: atomicrmw.end: 3876; GFX940-NEXT: ret float [[TMP6]] 3877; 3878; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3879; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3880; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3881; GFX10-NEXT: ret float [[RES]] 3882; 3883; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3884; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3885; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3886; GFX11-NEXT: ret float [[RES]] 3887; 3888; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( 3889; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3890; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 3891; GFX12-NEXT: ret float [[RES]] 3892; 3893 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 3894 ret float %res 3895} 3896 3897define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 3898; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3899; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3900; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3901; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 3902; GFX803: atomicrmw.start: 3903; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3904; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3905; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3906; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3907; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3908; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3909; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3910; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3911; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3912; GFX803: atomicrmw.end: 3913; GFX803-NEXT: ret float [[TMP6]] 3914; 3915; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3916; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3917; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3918; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 3919; GFX906: atomicrmw.start: 3920; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3921; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3922; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3923; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3924; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3925; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3926; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3927; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3928; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3929; GFX906: atomicrmw.end: 3930; GFX906-NEXT: ret float [[TMP6]] 3931; 3932; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3933; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3934; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3935; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 3936; GFX908: atomicrmw.start: 3937; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3938; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3939; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3940; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3941; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3942; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3943; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3944; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3945; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3946; GFX908: atomicrmw.end: 3947; GFX908-NEXT: ret float [[TMP6]] 3948; 3949; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3950; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3951; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3952; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 3953; GFX90A: atomicrmw.start: 3954; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3955; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3956; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3957; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3958; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3959; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3960; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3961; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3962; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3963; GFX90A: atomicrmw.end: 3964; GFX90A-NEXT: ret float [[TMP6]] 3965; 3966; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3967; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3968; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3969; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 3970; GFX940: atomicrmw.start: 3971; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 3972; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3973; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3974; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3975; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3976; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3977; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3978; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 3979; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3980; GFX940: atomicrmw.end: 3981; GFX940-NEXT: ret float [[TMP6]] 3982; 3983; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 3984; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 3985; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 3986; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 3987; GFX10: atomicrmw.start: 3988; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 3989; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 3990; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 3991; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 3992; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 3993; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 3994; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 3995; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 3996; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 3997; GFX10: atomicrmw.end: 3998; GFX10-NEXT: ret float [[RES]] 3999; 4000; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 4001; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4002; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4003; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 4004; GFX11: atomicrmw.start: 4005; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 4006; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4007; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4008; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4009; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4010; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4011; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4012; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 4013; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4014; GFX11: atomicrmw.end: 4015; GFX11-NEXT: ret float [[RES]] 4016; 4017; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( 4018; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4019; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4020; GFX12-NEXT: ret float [[RES]] 4021; 4022 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 4023 ret float %res 4024} 4025 4026define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 4027; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4028; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4029; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4030; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 4031; GFX803: atomicrmw.start: 4032; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4033; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4034; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4035; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4036; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4037; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4038; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4039; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4040; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4041; GFX803: atomicrmw.end: 4042; GFX803-NEXT: ret float [[TMP6]] 4043; 4044; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4045; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4046; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4047; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 4048; GFX906: atomicrmw.start: 4049; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4050; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4051; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4052; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4053; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4054; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4055; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4056; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4057; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4058; GFX906: atomicrmw.end: 4059; GFX906-NEXT: ret float [[TMP6]] 4060; 4061; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4062; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4063; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4064; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 4065; GFX908: atomicrmw.start: 4066; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4067; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4068; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4069; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4070; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4071; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4072; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4073; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4074; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4075; GFX908: atomicrmw.end: 4076; GFX908-NEXT: ret float [[TMP6]] 4077; 4078; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4079; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4080; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4081; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 4082; GFX90A: atomicrmw.start: 4083; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4084; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4085; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4086; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4087; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4088; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4089; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4090; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4091; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4092; GFX90A: atomicrmw.end: 4093; GFX90A-NEXT: ret float [[TMP6]] 4094; 4095; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4096; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4097; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4098; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 4099; GFX940: atomicrmw.start: 4100; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4101; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4102; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4103; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4104; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4105; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4106; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4107; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4108; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4109; GFX940: atomicrmw.end: 4110; GFX940-NEXT: ret float [[TMP6]] 4111; 4112; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4113; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4114; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4115; GFX10-NEXT: ret float [[RES]] 4116; 4117; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4118; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4119; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4120; GFX11-NEXT: ret float [[RES]] 4121; 4122; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4123; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4124; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4125; GFX12-NEXT: ret float [[RES]] 4126; 4127 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 4128 ret float %res 4129} 4130 4131define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, float %value) { 4132; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4133; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4134; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4135; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 4136; GFX803: atomicrmw.start: 4137; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4138; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4139; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4140; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4141; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4142; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4143; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4144; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4145; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4146; GFX803: atomicrmw.end: 4147; GFX803-NEXT: ret float [[TMP6]] 4148; 4149; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4150; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4151; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4152; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 4153; GFX906: atomicrmw.start: 4154; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4155; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4156; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4157; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4158; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4159; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4160; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4161; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4162; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4163; GFX906: atomicrmw.end: 4164; GFX906-NEXT: ret float [[TMP6]] 4165; 4166; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4167; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4168; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4169; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 4170; GFX908: atomicrmw.start: 4171; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4172; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4173; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4174; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4175; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4176; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4177; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4178; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4179; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4180; GFX908: atomicrmw.end: 4181; GFX908-NEXT: ret float [[TMP6]] 4182; 4183; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4184; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4185; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4186; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 4187; GFX90A: atomicrmw.start: 4188; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4189; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4190; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4191; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4192; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4193; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4194; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4195; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4196; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4197; GFX90A: atomicrmw.end: 4198; GFX90A-NEXT: ret float [[TMP6]] 4199; 4200; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4201; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4202; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4203; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 4204; GFX940: atomicrmw.start: 4205; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4206; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4207; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4208; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4209; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4210; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4211; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4212; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4213; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4214; GFX940: atomicrmw.end: 4215; GFX940-NEXT: ret float [[TMP6]] 4216; 4217; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4218; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4219; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4220; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 4221; GFX10: atomicrmw.start: 4222; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4223; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4224; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4225; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4226; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4227; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4228; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4229; GFX10-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4230; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4231; GFX10: atomicrmw.end: 4232; GFX10-NEXT: ret float [[TMP6]] 4233; 4234; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4235; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4236; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4237; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 4238; GFX11: atomicrmw.start: 4239; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4240; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4241; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4242; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4243; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 4244; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4245; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4246; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4247; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4248; GFX11: atomicrmw.end: 4249; GFX11-NEXT: ret float [[TMP6]] 4250; 4251; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( 4252; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4253; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] 4254; GFX12-NEXT: ret float [[RES]] 4255; 4256 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode !0 4257 ret float %res 4258} 4259 4260define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) { 4261; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4262; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4263; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4264; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 4265; GFX803: atomicrmw.start: 4266; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4267; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4268; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4269; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4270; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 4271; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4272; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4273; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4274; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4275; GFX803: atomicrmw.end: 4276; GFX803-NEXT: ret float [[TMP6]] 4277; 4278; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4279; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4280; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4281; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 4282; GFX906: atomicrmw.start: 4283; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4284; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4285; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4286; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4287; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 4288; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4289; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4290; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4291; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4292; GFX906: atomicrmw.end: 4293; GFX906-NEXT: ret float [[TMP6]] 4294; 4295; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4296; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4297; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4298; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 4299; GFX908: atomicrmw.start: 4300; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4301; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4302; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4303; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4304; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 4305; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4306; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4307; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4308; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4309; GFX908: atomicrmw.end: 4310; GFX908-NEXT: ret float [[TMP6]] 4311; 4312; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4313; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4314; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4315; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 4316; GFX90A: atomicrmw.start: 4317; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4318; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4319; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4320; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4321; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 4322; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4323; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4324; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4325; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4326; GFX90A: atomicrmw.end: 4327; GFX90A-NEXT: ret float [[TMP6]] 4328; 4329; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4330; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4331; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4332; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 4333; GFX940: atomicrmw.start: 4334; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4335; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4336; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4337; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4338; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] 4339; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4340; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4341; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4342; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4343; GFX940: atomicrmw.end: 4344; GFX940-NEXT: ret float [[TMP6]] 4345; 4346; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4347; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4348; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4349; GFX10-NEXT: ret float [[RES]] 4350; 4351; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4352; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4353; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4354; GFX11-NEXT: ret float [[RES]] 4355; 4356; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( 4357; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4358; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4359; GFX12-NEXT: ret float [[RES]] 4360; 4361 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 4362 ret float %res 4363} 4364 4365define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 4366; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4367; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4368; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4369; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 4370; GFX803: atomicrmw.start: 4371; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4372; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4373; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4374; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4375; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4376; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4377; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4378; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4379; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4380; GFX803: atomicrmw.end: 4381; GFX803-NEXT: ret float [[TMP6]] 4382; 4383; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4384; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4385; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4386; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 4387; GFX906: atomicrmw.start: 4388; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4389; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4390; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4391; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4392; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4393; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4394; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4395; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4396; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4397; GFX906: atomicrmw.end: 4398; GFX906-NEXT: ret float [[TMP6]] 4399; 4400; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4401; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4402; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4403; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 4404; GFX908: atomicrmw.start: 4405; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4406; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4407; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4408; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4409; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4410; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4411; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4412; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4413; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4414; GFX908: atomicrmw.end: 4415; GFX908-NEXT: ret float [[TMP6]] 4416; 4417; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4418; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4419; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4420; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 4421; GFX90A: atomicrmw.start: 4422; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4423; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4424; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4425; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4426; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4427; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4428; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4429; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4430; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4431; GFX90A: atomicrmw.end: 4432; GFX90A-NEXT: ret float [[TMP6]] 4433; 4434; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4435; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4436; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4437; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 4438; GFX940: atomicrmw.start: 4439; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4440; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4441; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4442; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4443; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4444; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4445; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4446; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4447; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4448; GFX940: atomicrmw.end: 4449; GFX940-NEXT: ret float [[TMP6]] 4450; 4451; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4452; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4453; GFX10-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4454; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] 4455; GFX10: atomicrmw.start: 4456; GFX10-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 4457; GFX10-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4458; GFX10-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4459; GFX10-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4460; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4461; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4462; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4463; GFX10-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 4464; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4465; GFX10: atomicrmw.end: 4466; GFX10-NEXT: ret float [[RES]] 4467; 4468; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4469; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4470; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4471; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] 4472; GFX11: atomicrmw.start: 4473; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] 4474; GFX11-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4475; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4476; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4477; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] 4478; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4479; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4480; GFX11-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float 4481; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4482; GFX11: atomicrmw.end: 4483; GFX11-NEXT: ret float [[RES]] 4484; 4485; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( 4486; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4487; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4488; GFX12-NEXT: ret float [[RES]] 4489; 4490 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 4491 ret float %res 4492} 4493 4494define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) { 4495; GFX803-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4496; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4497; GFX803-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4498; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] 4499; GFX803: atomicrmw.start: 4500; GFX803-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4501; GFX803-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4502; GFX803-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4503; GFX803-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4504; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4505; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4506; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4507; GFX803-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4508; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4509; GFX803: atomicrmw.end: 4510; GFX803-NEXT: ret float [[TMP6]] 4511; 4512; GFX906-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4513; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4514; GFX906-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4515; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] 4516; GFX906: atomicrmw.start: 4517; GFX906-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4518; GFX906-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4519; GFX906-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4520; GFX906-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4521; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4522; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4523; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4524; GFX906-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4525; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4526; GFX906: atomicrmw.end: 4527; GFX906-NEXT: ret float [[TMP6]] 4528; 4529; GFX908-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4530; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4531; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4532; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] 4533; GFX908: atomicrmw.start: 4534; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4535; GFX908-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4536; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4537; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4538; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4539; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4540; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4541; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4542; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4543; GFX908: atomicrmw.end: 4544; GFX908-NEXT: ret float [[TMP6]] 4545; 4546; GFX90A-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4547; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4548; GFX90A-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4549; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] 4550; GFX90A: atomicrmw.start: 4551; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4552; GFX90A-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4553; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4554; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4555; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4556; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4557; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4558; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4559; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4560; GFX90A: atomicrmw.end: 4561; GFX90A-NEXT: ret float [[TMP6]] 4562; 4563; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4564; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4565; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 4566; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] 4567; GFX940: atomicrmw.start: 4568; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] 4569; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) 4570; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 4571; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 4572; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] 4573; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 4574; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 4575; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float 4576; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] 4577; GFX940: atomicrmw.end: 4578; GFX940-NEXT: ret float [[TMP6]] 4579; 4580; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4581; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4582; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4583; GFX10-NEXT: ret float [[RES]] 4584; 4585; GFX11-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4586; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4587; GFX11-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4588; GFX11-NEXT: ret float [[RES]] 4589; 4590; GFX12-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( 4591; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { 4592; GFX12-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] 4593; GFX12-NEXT: ret float [[RES]] 4594; 4595 %res = atomicrmw fmin ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 4596 ret float %res 4597} 4598 4599attributes #0 = { "denormal-fp-mode-f32"="preserve-sign,preserve-sign" } 4600attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" } 4601 4602!0 = !{} 4603;. 4604; GFX803: [[META0]] = !{} 4605;. 4606; GFX906: [[META0]] = !{} 4607;. 4608; GFX908: [[META0]] = !{} 4609;. 4610; GFX90A: [[META0]] = !{} 4611;. 4612; GFX940: [[META0]] = !{} 4613;. 4614; GFX10: [[META0]] = !{} 4615;. 4616; GFX11: [[META0]] = !{} 4617;. 4618; GFX12: [[META0]] = !{} 4619;. 4620