1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s 6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s 7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s 8; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GFX12 %s 9 10define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 11; GFX6-LABEL: atomic_swap_i32_1d: 12; GFX6: ; %bb.0: ; %main_body 13; GFX6-NEXT: s_mov_b32 s0, s2 14; GFX6-NEXT: s_mov_b32 s1, s3 15; GFX6-NEXT: s_mov_b32 s2, s4 16; GFX6-NEXT: s_mov_b32 s3, s5 17; GFX6-NEXT: s_mov_b32 s4, s6 18; GFX6-NEXT: s_mov_b32 s5, s7 19; GFX6-NEXT: s_mov_b32 s6, s8 20; GFX6-NEXT: s_mov_b32 s7, s9 21; GFX6-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc 22; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 23; GFX6-NEXT: ; return to shader part epilog 24; 25; GFX8-LABEL: atomic_swap_i32_1d: 26; GFX8: ; %bb.0: ; %main_body 27; GFX8-NEXT: s_mov_b32 s0, s2 28; GFX8-NEXT: s_mov_b32 s1, s3 29; GFX8-NEXT: s_mov_b32 s2, s4 30; GFX8-NEXT: s_mov_b32 s3, s5 31; GFX8-NEXT: s_mov_b32 s4, s6 32; GFX8-NEXT: s_mov_b32 s5, s7 33; GFX8-NEXT: s_mov_b32 s6, s8 34; GFX8-NEXT: s_mov_b32 s7, s9 35; GFX8-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc 36; GFX8-NEXT: s_waitcnt vmcnt(0) 37; GFX8-NEXT: ; return to shader part epilog 38; 39; GFX900-LABEL: atomic_swap_i32_1d: 40; GFX900: ; %bb.0: ; %main_body 41; GFX900-NEXT: s_mov_b32 s0, s2 42; GFX900-NEXT: s_mov_b32 s1, s3 43; GFX900-NEXT: s_mov_b32 s2, s4 44; GFX900-NEXT: s_mov_b32 s3, s5 45; GFX900-NEXT: s_mov_b32 s4, s6 46; GFX900-NEXT: s_mov_b32 s5, s7 47; GFX900-NEXT: s_mov_b32 s6, s8 48; GFX900-NEXT: s_mov_b32 s7, s9 49; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc 50; GFX900-NEXT: s_waitcnt vmcnt(0) 51; GFX900-NEXT: ; return to shader part epilog 52; 53; GFX90A-LABEL: atomic_swap_i32_1d: 54; GFX90A: ; %bb.0: ; %main_body 55; GFX90A-NEXT: s_mov_b32 s0, s2 56; GFX90A-NEXT: s_mov_b32 s1, s3 57; GFX90A-NEXT: s_mov_b32 s2, s4 58; GFX90A-NEXT: s_mov_b32 s3, s5 59; GFX90A-NEXT: s_mov_b32 s4, s6 60; GFX90A-NEXT: s_mov_b32 s5, s7 61; GFX90A-NEXT: s_mov_b32 s6, s8 62; GFX90A-NEXT: s_mov_b32 s7, s9 63; GFX90A-NEXT: v_mov_b32_e32 v2, v1 64; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc 65; GFX90A-NEXT: s_waitcnt vmcnt(0) 66; GFX90A-NEXT: ; return to shader part epilog 67; 68; GFX10PLUS-LABEL: atomic_swap_i32_1d: 69; GFX10PLUS: ; %bb.0: ; %main_body 70; GFX10PLUS-NEXT: s_mov_b32 s0, s2 71; GFX10PLUS-NEXT: s_mov_b32 s1, s3 72; GFX10PLUS-NEXT: s_mov_b32 s2, s4 73; GFX10PLUS-NEXT: s_mov_b32 s3, s5 74; GFX10PLUS-NEXT: s_mov_b32 s4, s6 75; GFX10PLUS-NEXT: s_mov_b32 s5, s7 76; GFX10PLUS-NEXT: s_mov_b32 s6, s8 77; GFX10PLUS-NEXT: s_mov_b32 s7, s9 78; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 79; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 80; GFX10PLUS-NEXT: ; return to shader part epilog 81; 82; GFX12-LABEL: atomic_swap_i32_1d: 83; GFX12: ; %bb.0: ; %main_body 84; GFX12-NEXT: s_mov_b32 s0, s2 85; GFX12-NEXT: s_mov_b32 s1, s3 86; GFX12-NEXT: s_mov_b32 s2, s4 87; GFX12-NEXT: s_mov_b32 s3, s5 88; GFX12-NEXT: s_mov_b32 s4, s6 89; GFX12-NEXT: s_mov_b32 s5, s7 90; GFX12-NEXT: s_mov_b32 s6, s8 91; GFX12-NEXT: s_mov_b32 s7, s9 92; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 93; GFX12-NEXT: s_wait_loadcnt 0x0 94; GFX12-NEXT: ; return to shader part epilog 95main_body: 96 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 97 %out = bitcast i32 %v to float 98 ret float %out 99} 100 101define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 102; GFX6-LABEL: atomic_add_i32_1d: 103; GFX6: ; %bb.0: ; %main_body 104; GFX6-NEXT: s_mov_b32 s0, s2 105; GFX6-NEXT: s_mov_b32 s1, s3 106; GFX6-NEXT: s_mov_b32 s2, s4 107; GFX6-NEXT: s_mov_b32 s3, s5 108; GFX6-NEXT: s_mov_b32 s4, s6 109; GFX6-NEXT: s_mov_b32 s5, s7 110; GFX6-NEXT: s_mov_b32 s6, s8 111; GFX6-NEXT: s_mov_b32 s7, s9 112; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc 113; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 114; GFX6-NEXT: ; return to shader part epilog 115; 116; GFX8-LABEL: atomic_add_i32_1d: 117; GFX8: ; %bb.0: ; %main_body 118; GFX8-NEXT: s_mov_b32 s0, s2 119; GFX8-NEXT: s_mov_b32 s1, s3 120; GFX8-NEXT: s_mov_b32 s2, s4 121; GFX8-NEXT: s_mov_b32 s3, s5 122; GFX8-NEXT: s_mov_b32 s4, s6 123; GFX8-NEXT: s_mov_b32 s5, s7 124; GFX8-NEXT: s_mov_b32 s6, s8 125; GFX8-NEXT: s_mov_b32 s7, s9 126; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc 127; GFX8-NEXT: s_waitcnt vmcnt(0) 128; GFX8-NEXT: ; return to shader part epilog 129; 130; GFX900-LABEL: atomic_add_i32_1d: 131; GFX900: ; %bb.0: ; %main_body 132; GFX900-NEXT: s_mov_b32 s0, s2 133; GFX900-NEXT: s_mov_b32 s1, s3 134; GFX900-NEXT: s_mov_b32 s2, s4 135; GFX900-NEXT: s_mov_b32 s3, s5 136; GFX900-NEXT: s_mov_b32 s4, s6 137; GFX900-NEXT: s_mov_b32 s5, s7 138; GFX900-NEXT: s_mov_b32 s6, s8 139; GFX900-NEXT: s_mov_b32 s7, s9 140; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc 141; GFX900-NEXT: s_waitcnt vmcnt(0) 142; GFX900-NEXT: ; return to shader part epilog 143; 144; GFX90A-LABEL: atomic_add_i32_1d: 145; GFX90A: ; %bb.0: ; %main_body 146; GFX90A-NEXT: s_mov_b32 s0, s2 147; GFX90A-NEXT: s_mov_b32 s1, s3 148; GFX90A-NEXT: s_mov_b32 s2, s4 149; GFX90A-NEXT: s_mov_b32 s3, s5 150; GFX90A-NEXT: s_mov_b32 s4, s6 151; GFX90A-NEXT: s_mov_b32 s5, s7 152; GFX90A-NEXT: s_mov_b32 s6, s8 153; GFX90A-NEXT: s_mov_b32 s7, s9 154; GFX90A-NEXT: v_mov_b32_e32 v2, v1 155; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc 156; GFX90A-NEXT: s_waitcnt vmcnt(0) 157; GFX90A-NEXT: ; return to shader part epilog 158; 159; GFX10PLUS-LABEL: atomic_add_i32_1d: 160; GFX10PLUS: ; %bb.0: ; %main_body 161; GFX10PLUS-NEXT: s_mov_b32 s0, s2 162; GFX10PLUS-NEXT: s_mov_b32 s1, s3 163; GFX10PLUS-NEXT: s_mov_b32 s2, s4 164; GFX10PLUS-NEXT: s_mov_b32 s3, s5 165; GFX10PLUS-NEXT: s_mov_b32 s4, s6 166; GFX10PLUS-NEXT: s_mov_b32 s5, s7 167; GFX10PLUS-NEXT: s_mov_b32 s6, s8 168; GFX10PLUS-NEXT: s_mov_b32 s7, s9 169; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 170; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 171; GFX10PLUS-NEXT: ; return to shader part epilog 172; 173; GFX12-LABEL: atomic_add_i32_1d: 174; GFX12: ; %bb.0: ; %main_body 175; GFX12-NEXT: s_mov_b32 s0, s2 176; GFX12-NEXT: s_mov_b32 s1, s3 177; GFX12-NEXT: s_mov_b32 s2, s4 178; GFX12-NEXT: s_mov_b32 s3, s5 179; GFX12-NEXT: s_mov_b32 s4, s6 180; GFX12-NEXT: s_mov_b32 s5, s7 181; GFX12-NEXT: s_mov_b32 s6, s8 182; GFX12-NEXT: s_mov_b32 s7, s9 183; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 184; GFX12-NEXT: s_wait_loadcnt 0x0 185; GFX12-NEXT: ; return to shader part epilog 186main_body: 187 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 188 %out = bitcast i32 %v to float 189 ret float %out 190} 191 192define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 193; GFX6-LABEL: atomic_sub_i32_1d: 194; GFX6: ; %bb.0: ; %main_body 195; GFX6-NEXT: s_mov_b32 s0, s2 196; GFX6-NEXT: s_mov_b32 s1, s3 197; GFX6-NEXT: s_mov_b32 s2, s4 198; GFX6-NEXT: s_mov_b32 s3, s5 199; GFX6-NEXT: s_mov_b32 s4, s6 200; GFX6-NEXT: s_mov_b32 s5, s7 201; GFX6-NEXT: s_mov_b32 s6, s8 202; GFX6-NEXT: s_mov_b32 s7, s9 203; GFX6-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc 204; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 205; GFX6-NEXT: ; return to shader part epilog 206; 207; GFX8-LABEL: atomic_sub_i32_1d: 208; GFX8: ; %bb.0: ; %main_body 209; GFX8-NEXT: s_mov_b32 s0, s2 210; GFX8-NEXT: s_mov_b32 s1, s3 211; GFX8-NEXT: s_mov_b32 s2, s4 212; GFX8-NEXT: s_mov_b32 s3, s5 213; GFX8-NEXT: s_mov_b32 s4, s6 214; GFX8-NEXT: s_mov_b32 s5, s7 215; GFX8-NEXT: s_mov_b32 s6, s8 216; GFX8-NEXT: s_mov_b32 s7, s9 217; GFX8-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc 218; GFX8-NEXT: s_waitcnt vmcnt(0) 219; GFX8-NEXT: ; return to shader part epilog 220; 221; GFX900-LABEL: atomic_sub_i32_1d: 222; GFX900: ; %bb.0: ; %main_body 223; GFX900-NEXT: s_mov_b32 s0, s2 224; GFX900-NEXT: s_mov_b32 s1, s3 225; GFX900-NEXT: s_mov_b32 s2, s4 226; GFX900-NEXT: s_mov_b32 s3, s5 227; GFX900-NEXT: s_mov_b32 s4, s6 228; GFX900-NEXT: s_mov_b32 s5, s7 229; GFX900-NEXT: s_mov_b32 s6, s8 230; GFX900-NEXT: s_mov_b32 s7, s9 231; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc 232; GFX900-NEXT: s_waitcnt vmcnt(0) 233; GFX900-NEXT: ; return to shader part epilog 234; 235; GFX90A-LABEL: atomic_sub_i32_1d: 236; GFX90A: ; %bb.0: ; %main_body 237; GFX90A-NEXT: s_mov_b32 s0, s2 238; GFX90A-NEXT: s_mov_b32 s1, s3 239; GFX90A-NEXT: s_mov_b32 s2, s4 240; GFX90A-NEXT: s_mov_b32 s3, s5 241; GFX90A-NEXT: s_mov_b32 s4, s6 242; GFX90A-NEXT: s_mov_b32 s5, s7 243; GFX90A-NEXT: s_mov_b32 s6, s8 244; GFX90A-NEXT: s_mov_b32 s7, s9 245; GFX90A-NEXT: v_mov_b32_e32 v2, v1 246; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc 247; GFX90A-NEXT: s_waitcnt vmcnt(0) 248; GFX90A-NEXT: ; return to shader part epilog 249; 250; GFX10PLUS-LABEL: atomic_sub_i32_1d: 251; GFX10PLUS: ; %bb.0: ; %main_body 252; GFX10PLUS-NEXT: s_mov_b32 s0, s2 253; GFX10PLUS-NEXT: s_mov_b32 s1, s3 254; GFX10PLUS-NEXT: s_mov_b32 s2, s4 255; GFX10PLUS-NEXT: s_mov_b32 s3, s5 256; GFX10PLUS-NEXT: s_mov_b32 s4, s6 257; GFX10PLUS-NEXT: s_mov_b32 s5, s7 258; GFX10PLUS-NEXT: s_mov_b32 s6, s8 259; GFX10PLUS-NEXT: s_mov_b32 s7, s9 260; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 261; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 262; GFX10PLUS-NEXT: ; return to shader part epilog 263; 264; GFX12-LABEL: atomic_sub_i32_1d: 265; GFX12: ; %bb.0: ; %main_body 266; GFX12-NEXT: s_mov_b32 s0, s2 267; GFX12-NEXT: s_mov_b32 s1, s3 268; GFX12-NEXT: s_mov_b32 s2, s4 269; GFX12-NEXT: s_mov_b32 s3, s5 270; GFX12-NEXT: s_mov_b32 s4, s6 271; GFX12-NEXT: s_mov_b32 s5, s7 272; GFX12-NEXT: s_mov_b32 s6, s8 273; GFX12-NEXT: s_mov_b32 s7, s9 274; GFX12-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 275; GFX12-NEXT: s_wait_loadcnt 0x0 276; GFX12-NEXT: ; return to shader part epilog 277main_body: 278 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 279 %out = bitcast i32 %v to float 280 ret float %out 281} 282 283define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 284; GFX6-LABEL: atomic_smin_i32_1d: 285; GFX6: ; %bb.0: ; %main_body 286; GFX6-NEXT: s_mov_b32 s0, s2 287; GFX6-NEXT: s_mov_b32 s1, s3 288; GFX6-NEXT: s_mov_b32 s2, s4 289; GFX6-NEXT: s_mov_b32 s3, s5 290; GFX6-NEXT: s_mov_b32 s4, s6 291; GFX6-NEXT: s_mov_b32 s5, s7 292; GFX6-NEXT: s_mov_b32 s6, s8 293; GFX6-NEXT: s_mov_b32 s7, s9 294; GFX6-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc 295; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 296; GFX6-NEXT: ; return to shader part epilog 297; 298; GFX8-LABEL: atomic_smin_i32_1d: 299; GFX8: ; %bb.0: ; %main_body 300; GFX8-NEXT: s_mov_b32 s0, s2 301; GFX8-NEXT: s_mov_b32 s1, s3 302; GFX8-NEXT: s_mov_b32 s2, s4 303; GFX8-NEXT: s_mov_b32 s3, s5 304; GFX8-NEXT: s_mov_b32 s4, s6 305; GFX8-NEXT: s_mov_b32 s5, s7 306; GFX8-NEXT: s_mov_b32 s6, s8 307; GFX8-NEXT: s_mov_b32 s7, s9 308; GFX8-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc 309; GFX8-NEXT: s_waitcnt vmcnt(0) 310; GFX8-NEXT: ; return to shader part epilog 311; 312; GFX900-LABEL: atomic_smin_i32_1d: 313; GFX900: ; %bb.0: ; %main_body 314; GFX900-NEXT: s_mov_b32 s0, s2 315; GFX900-NEXT: s_mov_b32 s1, s3 316; GFX900-NEXT: s_mov_b32 s2, s4 317; GFX900-NEXT: s_mov_b32 s3, s5 318; GFX900-NEXT: s_mov_b32 s4, s6 319; GFX900-NEXT: s_mov_b32 s5, s7 320; GFX900-NEXT: s_mov_b32 s6, s8 321; GFX900-NEXT: s_mov_b32 s7, s9 322; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc 323; GFX900-NEXT: s_waitcnt vmcnt(0) 324; GFX900-NEXT: ; return to shader part epilog 325; 326; GFX90A-LABEL: atomic_smin_i32_1d: 327; GFX90A: ; %bb.0: ; %main_body 328; GFX90A-NEXT: s_mov_b32 s0, s2 329; GFX90A-NEXT: s_mov_b32 s1, s3 330; GFX90A-NEXT: s_mov_b32 s2, s4 331; GFX90A-NEXT: s_mov_b32 s3, s5 332; GFX90A-NEXT: s_mov_b32 s4, s6 333; GFX90A-NEXT: s_mov_b32 s5, s7 334; GFX90A-NEXT: s_mov_b32 s6, s8 335; GFX90A-NEXT: s_mov_b32 s7, s9 336; GFX90A-NEXT: v_mov_b32_e32 v2, v1 337; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc 338; GFX90A-NEXT: s_waitcnt vmcnt(0) 339; GFX90A-NEXT: ; return to shader part epilog 340; 341; GFX10PLUS-LABEL: atomic_smin_i32_1d: 342; GFX10PLUS: ; %bb.0: ; %main_body 343; GFX10PLUS-NEXT: s_mov_b32 s0, s2 344; GFX10PLUS-NEXT: s_mov_b32 s1, s3 345; GFX10PLUS-NEXT: s_mov_b32 s2, s4 346; GFX10PLUS-NEXT: s_mov_b32 s3, s5 347; GFX10PLUS-NEXT: s_mov_b32 s4, s6 348; GFX10PLUS-NEXT: s_mov_b32 s5, s7 349; GFX10PLUS-NEXT: s_mov_b32 s6, s8 350; GFX10PLUS-NEXT: s_mov_b32 s7, s9 351; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 352; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 353; GFX10PLUS-NEXT: ; return to shader part epilog 354; 355; GFX12-LABEL: atomic_smin_i32_1d: 356; GFX12: ; %bb.0: ; %main_body 357; GFX12-NEXT: s_mov_b32 s0, s2 358; GFX12-NEXT: s_mov_b32 s1, s3 359; GFX12-NEXT: s_mov_b32 s2, s4 360; GFX12-NEXT: s_mov_b32 s3, s5 361; GFX12-NEXT: s_mov_b32 s4, s6 362; GFX12-NEXT: s_mov_b32 s5, s7 363; GFX12-NEXT: s_mov_b32 s6, s8 364; GFX12-NEXT: s_mov_b32 s7, s9 365; GFX12-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 366; GFX12-NEXT: s_wait_loadcnt 0x0 367; GFX12-NEXT: ; return to shader part epilog 368main_body: 369 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 370 %out = bitcast i32 %v to float 371 ret float %out 372} 373 374define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 375; GFX6-LABEL: atomic_umin_i32_1d: 376; GFX6: ; %bb.0: ; %main_body 377; GFX6-NEXT: s_mov_b32 s0, s2 378; GFX6-NEXT: s_mov_b32 s1, s3 379; GFX6-NEXT: s_mov_b32 s2, s4 380; GFX6-NEXT: s_mov_b32 s3, s5 381; GFX6-NEXT: s_mov_b32 s4, s6 382; GFX6-NEXT: s_mov_b32 s5, s7 383; GFX6-NEXT: s_mov_b32 s6, s8 384; GFX6-NEXT: s_mov_b32 s7, s9 385; GFX6-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc 386; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 387; GFX6-NEXT: ; return to shader part epilog 388; 389; GFX8-LABEL: atomic_umin_i32_1d: 390; GFX8: ; %bb.0: ; %main_body 391; GFX8-NEXT: s_mov_b32 s0, s2 392; GFX8-NEXT: s_mov_b32 s1, s3 393; GFX8-NEXT: s_mov_b32 s2, s4 394; GFX8-NEXT: s_mov_b32 s3, s5 395; GFX8-NEXT: s_mov_b32 s4, s6 396; GFX8-NEXT: s_mov_b32 s5, s7 397; GFX8-NEXT: s_mov_b32 s6, s8 398; GFX8-NEXT: s_mov_b32 s7, s9 399; GFX8-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc 400; GFX8-NEXT: s_waitcnt vmcnt(0) 401; GFX8-NEXT: ; return to shader part epilog 402; 403; GFX900-LABEL: atomic_umin_i32_1d: 404; GFX900: ; %bb.0: ; %main_body 405; GFX900-NEXT: s_mov_b32 s0, s2 406; GFX900-NEXT: s_mov_b32 s1, s3 407; GFX900-NEXT: s_mov_b32 s2, s4 408; GFX900-NEXT: s_mov_b32 s3, s5 409; GFX900-NEXT: s_mov_b32 s4, s6 410; GFX900-NEXT: s_mov_b32 s5, s7 411; GFX900-NEXT: s_mov_b32 s6, s8 412; GFX900-NEXT: s_mov_b32 s7, s9 413; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc 414; GFX900-NEXT: s_waitcnt vmcnt(0) 415; GFX900-NEXT: ; return to shader part epilog 416; 417; GFX90A-LABEL: atomic_umin_i32_1d: 418; GFX90A: ; %bb.0: ; %main_body 419; GFX90A-NEXT: s_mov_b32 s0, s2 420; GFX90A-NEXT: s_mov_b32 s1, s3 421; GFX90A-NEXT: s_mov_b32 s2, s4 422; GFX90A-NEXT: s_mov_b32 s3, s5 423; GFX90A-NEXT: s_mov_b32 s4, s6 424; GFX90A-NEXT: s_mov_b32 s5, s7 425; GFX90A-NEXT: s_mov_b32 s6, s8 426; GFX90A-NEXT: s_mov_b32 s7, s9 427; GFX90A-NEXT: v_mov_b32_e32 v2, v1 428; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc 429; GFX90A-NEXT: s_waitcnt vmcnt(0) 430; GFX90A-NEXT: ; return to shader part epilog 431; 432; GFX10PLUS-LABEL: atomic_umin_i32_1d: 433; GFX10PLUS: ; %bb.0: ; %main_body 434; GFX10PLUS-NEXT: s_mov_b32 s0, s2 435; GFX10PLUS-NEXT: s_mov_b32 s1, s3 436; GFX10PLUS-NEXT: s_mov_b32 s2, s4 437; GFX10PLUS-NEXT: s_mov_b32 s3, s5 438; GFX10PLUS-NEXT: s_mov_b32 s4, s6 439; GFX10PLUS-NEXT: s_mov_b32 s5, s7 440; GFX10PLUS-NEXT: s_mov_b32 s6, s8 441; GFX10PLUS-NEXT: s_mov_b32 s7, s9 442; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 443; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 444; GFX10PLUS-NEXT: ; return to shader part epilog 445; 446; GFX12-LABEL: atomic_umin_i32_1d: 447; GFX12: ; %bb.0: ; %main_body 448; GFX12-NEXT: s_mov_b32 s0, s2 449; GFX12-NEXT: s_mov_b32 s1, s3 450; GFX12-NEXT: s_mov_b32 s2, s4 451; GFX12-NEXT: s_mov_b32 s3, s5 452; GFX12-NEXT: s_mov_b32 s4, s6 453; GFX12-NEXT: s_mov_b32 s5, s7 454; GFX12-NEXT: s_mov_b32 s6, s8 455; GFX12-NEXT: s_mov_b32 s7, s9 456; GFX12-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 457; GFX12-NEXT: s_wait_loadcnt 0x0 458; GFX12-NEXT: ; return to shader part epilog 459main_body: 460 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 461 %out = bitcast i32 %v to float 462 ret float %out 463} 464 465define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 466; GFX6-LABEL: atomic_smax_i32_1d: 467; GFX6: ; %bb.0: ; %main_body 468; GFX6-NEXT: s_mov_b32 s0, s2 469; GFX6-NEXT: s_mov_b32 s1, s3 470; GFX6-NEXT: s_mov_b32 s2, s4 471; GFX6-NEXT: s_mov_b32 s3, s5 472; GFX6-NEXT: s_mov_b32 s4, s6 473; GFX6-NEXT: s_mov_b32 s5, s7 474; GFX6-NEXT: s_mov_b32 s6, s8 475; GFX6-NEXT: s_mov_b32 s7, s9 476; GFX6-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc 477; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 478; GFX6-NEXT: ; return to shader part epilog 479; 480; GFX8-LABEL: atomic_smax_i32_1d: 481; GFX8: ; %bb.0: ; %main_body 482; GFX8-NEXT: s_mov_b32 s0, s2 483; GFX8-NEXT: s_mov_b32 s1, s3 484; GFX8-NEXT: s_mov_b32 s2, s4 485; GFX8-NEXT: s_mov_b32 s3, s5 486; GFX8-NEXT: s_mov_b32 s4, s6 487; GFX8-NEXT: s_mov_b32 s5, s7 488; GFX8-NEXT: s_mov_b32 s6, s8 489; GFX8-NEXT: s_mov_b32 s7, s9 490; GFX8-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc 491; GFX8-NEXT: s_waitcnt vmcnt(0) 492; GFX8-NEXT: ; return to shader part epilog 493; 494; GFX900-LABEL: atomic_smax_i32_1d: 495; GFX900: ; %bb.0: ; %main_body 496; GFX900-NEXT: s_mov_b32 s0, s2 497; GFX900-NEXT: s_mov_b32 s1, s3 498; GFX900-NEXT: s_mov_b32 s2, s4 499; GFX900-NEXT: s_mov_b32 s3, s5 500; GFX900-NEXT: s_mov_b32 s4, s6 501; GFX900-NEXT: s_mov_b32 s5, s7 502; GFX900-NEXT: s_mov_b32 s6, s8 503; GFX900-NEXT: s_mov_b32 s7, s9 504; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc 505; GFX900-NEXT: s_waitcnt vmcnt(0) 506; GFX900-NEXT: ; return to shader part epilog 507; 508; GFX90A-LABEL: atomic_smax_i32_1d: 509; GFX90A: ; %bb.0: ; %main_body 510; GFX90A-NEXT: s_mov_b32 s0, s2 511; GFX90A-NEXT: s_mov_b32 s1, s3 512; GFX90A-NEXT: s_mov_b32 s2, s4 513; GFX90A-NEXT: s_mov_b32 s3, s5 514; GFX90A-NEXT: s_mov_b32 s4, s6 515; GFX90A-NEXT: s_mov_b32 s5, s7 516; GFX90A-NEXT: s_mov_b32 s6, s8 517; GFX90A-NEXT: s_mov_b32 s7, s9 518; GFX90A-NEXT: v_mov_b32_e32 v2, v1 519; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc 520; GFX90A-NEXT: s_waitcnt vmcnt(0) 521; GFX90A-NEXT: ; return to shader part epilog 522; 523; GFX10PLUS-LABEL: atomic_smax_i32_1d: 524; GFX10PLUS: ; %bb.0: ; %main_body 525; GFX10PLUS-NEXT: s_mov_b32 s0, s2 526; GFX10PLUS-NEXT: s_mov_b32 s1, s3 527; GFX10PLUS-NEXT: s_mov_b32 s2, s4 528; GFX10PLUS-NEXT: s_mov_b32 s3, s5 529; GFX10PLUS-NEXT: s_mov_b32 s4, s6 530; GFX10PLUS-NEXT: s_mov_b32 s5, s7 531; GFX10PLUS-NEXT: s_mov_b32 s6, s8 532; GFX10PLUS-NEXT: s_mov_b32 s7, s9 533; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 534; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 535; GFX10PLUS-NEXT: ; return to shader part epilog 536; 537; GFX12-LABEL: atomic_smax_i32_1d: 538; GFX12: ; %bb.0: ; %main_body 539; GFX12-NEXT: s_mov_b32 s0, s2 540; GFX12-NEXT: s_mov_b32 s1, s3 541; GFX12-NEXT: s_mov_b32 s2, s4 542; GFX12-NEXT: s_mov_b32 s3, s5 543; GFX12-NEXT: s_mov_b32 s4, s6 544; GFX12-NEXT: s_mov_b32 s5, s7 545; GFX12-NEXT: s_mov_b32 s6, s8 546; GFX12-NEXT: s_mov_b32 s7, s9 547; GFX12-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 548; GFX12-NEXT: s_wait_loadcnt 0x0 549; GFX12-NEXT: ; return to shader part epilog 550main_body: 551 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 552 %out = bitcast i32 %v to float 553 ret float %out 554} 555 556define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 557; GFX6-LABEL: atomic_umax_i32_1d: 558; GFX6: ; %bb.0: ; %main_body 559; GFX6-NEXT: s_mov_b32 s0, s2 560; GFX6-NEXT: s_mov_b32 s1, s3 561; GFX6-NEXT: s_mov_b32 s2, s4 562; GFX6-NEXT: s_mov_b32 s3, s5 563; GFX6-NEXT: s_mov_b32 s4, s6 564; GFX6-NEXT: s_mov_b32 s5, s7 565; GFX6-NEXT: s_mov_b32 s6, s8 566; GFX6-NEXT: s_mov_b32 s7, s9 567; GFX6-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc 568; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 569; GFX6-NEXT: ; return to shader part epilog 570; 571; GFX8-LABEL: atomic_umax_i32_1d: 572; GFX8: ; %bb.0: ; %main_body 573; GFX8-NEXT: s_mov_b32 s0, s2 574; GFX8-NEXT: s_mov_b32 s1, s3 575; GFX8-NEXT: s_mov_b32 s2, s4 576; GFX8-NEXT: s_mov_b32 s3, s5 577; GFX8-NEXT: s_mov_b32 s4, s6 578; GFX8-NEXT: s_mov_b32 s5, s7 579; GFX8-NEXT: s_mov_b32 s6, s8 580; GFX8-NEXT: s_mov_b32 s7, s9 581; GFX8-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc 582; GFX8-NEXT: s_waitcnt vmcnt(0) 583; GFX8-NEXT: ; return to shader part epilog 584; 585; GFX900-LABEL: atomic_umax_i32_1d: 586; GFX900: ; %bb.0: ; %main_body 587; GFX900-NEXT: s_mov_b32 s0, s2 588; GFX900-NEXT: s_mov_b32 s1, s3 589; GFX900-NEXT: s_mov_b32 s2, s4 590; GFX900-NEXT: s_mov_b32 s3, s5 591; GFX900-NEXT: s_mov_b32 s4, s6 592; GFX900-NEXT: s_mov_b32 s5, s7 593; GFX900-NEXT: s_mov_b32 s6, s8 594; GFX900-NEXT: s_mov_b32 s7, s9 595; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc 596; GFX900-NEXT: s_waitcnt vmcnt(0) 597; GFX900-NEXT: ; return to shader part epilog 598; 599; GFX90A-LABEL: atomic_umax_i32_1d: 600; GFX90A: ; %bb.0: ; %main_body 601; GFX90A-NEXT: s_mov_b32 s0, s2 602; GFX90A-NEXT: s_mov_b32 s1, s3 603; GFX90A-NEXT: s_mov_b32 s2, s4 604; GFX90A-NEXT: s_mov_b32 s3, s5 605; GFX90A-NEXT: s_mov_b32 s4, s6 606; GFX90A-NEXT: s_mov_b32 s5, s7 607; GFX90A-NEXT: s_mov_b32 s6, s8 608; GFX90A-NEXT: s_mov_b32 s7, s9 609; GFX90A-NEXT: v_mov_b32_e32 v2, v1 610; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc 611; GFX90A-NEXT: s_waitcnt vmcnt(0) 612; GFX90A-NEXT: ; return to shader part epilog 613; 614; GFX10PLUS-LABEL: atomic_umax_i32_1d: 615; GFX10PLUS: ; %bb.0: ; %main_body 616; GFX10PLUS-NEXT: s_mov_b32 s0, s2 617; GFX10PLUS-NEXT: s_mov_b32 s1, s3 618; GFX10PLUS-NEXT: s_mov_b32 s2, s4 619; GFX10PLUS-NEXT: s_mov_b32 s3, s5 620; GFX10PLUS-NEXT: s_mov_b32 s4, s6 621; GFX10PLUS-NEXT: s_mov_b32 s5, s7 622; GFX10PLUS-NEXT: s_mov_b32 s6, s8 623; GFX10PLUS-NEXT: s_mov_b32 s7, s9 624; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 625; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 626; GFX10PLUS-NEXT: ; return to shader part epilog 627; 628; GFX12-LABEL: atomic_umax_i32_1d: 629; GFX12: ; %bb.0: ; %main_body 630; GFX12-NEXT: s_mov_b32 s0, s2 631; GFX12-NEXT: s_mov_b32 s1, s3 632; GFX12-NEXT: s_mov_b32 s2, s4 633; GFX12-NEXT: s_mov_b32 s3, s5 634; GFX12-NEXT: s_mov_b32 s4, s6 635; GFX12-NEXT: s_mov_b32 s5, s7 636; GFX12-NEXT: s_mov_b32 s6, s8 637; GFX12-NEXT: s_mov_b32 s7, s9 638; GFX12-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 639; GFX12-NEXT: s_wait_loadcnt 0x0 640; GFX12-NEXT: ; return to shader part epilog 641main_body: 642 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 643 %out = bitcast i32 %v to float 644 ret float %out 645} 646 647define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 648; GFX6-LABEL: atomic_and_i32_1d: 649; GFX6: ; %bb.0: ; %main_body 650; GFX6-NEXT: s_mov_b32 s0, s2 651; GFX6-NEXT: s_mov_b32 s1, s3 652; GFX6-NEXT: s_mov_b32 s2, s4 653; GFX6-NEXT: s_mov_b32 s3, s5 654; GFX6-NEXT: s_mov_b32 s4, s6 655; GFX6-NEXT: s_mov_b32 s5, s7 656; GFX6-NEXT: s_mov_b32 s6, s8 657; GFX6-NEXT: s_mov_b32 s7, s9 658; GFX6-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc 659; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 660; GFX6-NEXT: ; return to shader part epilog 661; 662; GFX8-LABEL: atomic_and_i32_1d: 663; GFX8: ; %bb.0: ; %main_body 664; GFX8-NEXT: s_mov_b32 s0, s2 665; GFX8-NEXT: s_mov_b32 s1, s3 666; GFX8-NEXT: s_mov_b32 s2, s4 667; GFX8-NEXT: s_mov_b32 s3, s5 668; GFX8-NEXT: s_mov_b32 s4, s6 669; GFX8-NEXT: s_mov_b32 s5, s7 670; GFX8-NEXT: s_mov_b32 s6, s8 671; GFX8-NEXT: s_mov_b32 s7, s9 672; GFX8-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc 673; GFX8-NEXT: s_waitcnt vmcnt(0) 674; GFX8-NEXT: ; return to shader part epilog 675; 676; GFX900-LABEL: atomic_and_i32_1d: 677; GFX900: ; %bb.0: ; %main_body 678; GFX900-NEXT: s_mov_b32 s0, s2 679; GFX900-NEXT: s_mov_b32 s1, s3 680; GFX900-NEXT: s_mov_b32 s2, s4 681; GFX900-NEXT: s_mov_b32 s3, s5 682; GFX900-NEXT: s_mov_b32 s4, s6 683; GFX900-NEXT: s_mov_b32 s5, s7 684; GFX900-NEXT: s_mov_b32 s6, s8 685; GFX900-NEXT: s_mov_b32 s7, s9 686; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc 687; GFX900-NEXT: s_waitcnt vmcnt(0) 688; GFX900-NEXT: ; return to shader part epilog 689; 690; GFX90A-LABEL: atomic_and_i32_1d: 691; GFX90A: ; %bb.0: ; %main_body 692; GFX90A-NEXT: s_mov_b32 s0, s2 693; GFX90A-NEXT: s_mov_b32 s1, s3 694; GFX90A-NEXT: s_mov_b32 s2, s4 695; GFX90A-NEXT: s_mov_b32 s3, s5 696; GFX90A-NEXT: s_mov_b32 s4, s6 697; GFX90A-NEXT: s_mov_b32 s5, s7 698; GFX90A-NEXT: s_mov_b32 s6, s8 699; GFX90A-NEXT: s_mov_b32 s7, s9 700; GFX90A-NEXT: v_mov_b32_e32 v2, v1 701; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc 702; GFX90A-NEXT: s_waitcnt vmcnt(0) 703; GFX90A-NEXT: ; return to shader part epilog 704; 705; GFX10PLUS-LABEL: atomic_and_i32_1d: 706; GFX10PLUS: ; %bb.0: ; %main_body 707; GFX10PLUS-NEXT: s_mov_b32 s0, s2 708; GFX10PLUS-NEXT: s_mov_b32 s1, s3 709; GFX10PLUS-NEXT: s_mov_b32 s2, s4 710; GFX10PLUS-NEXT: s_mov_b32 s3, s5 711; GFX10PLUS-NEXT: s_mov_b32 s4, s6 712; GFX10PLUS-NEXT: s_mov_b32 s5, s7 713; GFX10PLUS-NEXT: s_mov_b32 s6, s8 714; GFX10PLUS-NEXT: s_mov_b32 s7, s9 715; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 716; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 717; GFX10PLUS-NEXT: ; return to shader part epilog 718; 719; GFX12-LABEL: atomic_and_i32_1d: 720; GFX12: ; %bb.0: ; %main_body 721; GFX12-NEXT: s_mov_b32 s0, s2 722; GFX12-NEXT: s_mov_b32 s1, s3 723; GFX12-NEXT: s_mov_b32 s2, s4 724; GFX12-NEXT: s_mov_b32 s3, s5 725; GFX12-NEXT: s_mov_b32 s4, s6 726; GFX12-NEXT: s_mov_b32 s5, s7 727; GFX12-NEXT: s_mov_b32 s6, s8 728; GFX12-NEXT: s_mov_b32 s7, s9 729; GFX12-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 730; GFX12-NEXT: s_wait_loadcnt 0x0 731; GFX12-NEXT: ; return to shader part epilog 732main_body: 733 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 734 %out = bitcast i32 %v to float 735 ret float %out 736} 737 738define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 739; GFX6-LABEL: atomic_or_i32_1d: 740; GFX6: ; %bb.0: ; %main_body 741; GFX6-NEXT: s_mov_b32 s0, s2 742; GFX6-NEXT: s_mov_b32 s1, s3 743; GFX6-NEXT: s_mov_b32 s2, s4 744; GFX6-NEXT: s_mov_b32 s3, s5 745; GFX6-NEXT: s_mov_b32 s4, s6 746; GFX6-NEXT: s_mov_b32 s5, s7 747; GFX6-NEXT: s_mov_b32 s6, s8 748; GFX6-NEXT: s_mov_b32 s7, s9 749; GFX6-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc 750; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 751; GFX6-NEXT: ; return to shader part epilog 752; 753; GFX8-LABEL: atomic_or_i32_1d: 754; GFX8: ; %bb.0: ; %main_body 755; GFX8-NEXT: s_mov_b32 s0, s2 756; GFX8-NEXT: s_mov_b32 s1, s3 757; GFX8-NEXT: s_mov_b32 s2, s4 758; GFX8-NEXT: s_mov_b32 s3, s5 759; GFX8-NEXT: s_mov_b32 s4, s6 760; GFX8-NEXT: s_mov_b32 s5, s7 761; GFX8-NEXT: s_mov_b32 s6, s8 762; GFX8-NEXT: s_mov_b32 s7, s9 763; GFX8-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc 764; GFX8-NEXT: s_waitcnt vmcnt(0) 765; GFX8-NEXT: ; return to shader part epilog 766; 767; GFX900-LABEL: atomic_or_i32_1d: 768; GFX900: ; %bb.0: ; %main_body 769; GFX900-NEXT: s_mov_b32 s0, s2 770; GFX900-NEXT: s_mov_b32 s1, s3 771; GFX900-NEXT: s_mov_b32 s2, s4 772; GFX900-NEXT: s_mov_b32 s3, s5 773; GFX900-NEXT: s_mov_b32 s4, s6 774; GFX900-NEXT: s_mov_b32 s5, s7 775; GFX900-NEXT: s_mov_b32 s6, s8 776; GFX900-NEXT: s_mov_b32 s7, s9 777; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc 778; GFX900-NEXT: s_waitcnt vmcnt(0) 779; GFX900-NEXT: ; return to shader part epilog 780; 781; GFX90A-LABEL: atomic_or_i32_1d: 782; GFX90A: ; %bb.0: ; %main_body 783; GFX90A-NEXT: s_mov_b32 s0, s2 784; GFX90A-NEXT: s_mov_b32 s1, s3 785; GFX90A-NEXT: s_mov_b32 s2, s4 786; GFX90A-NEXT: s_mov_b32 s3, s5 787; GFX90A-NEXT: s_mov_b32 s4, s6 788; GFX90A-NEXT: s_mov_b32 s5, s7 789; GFX90A-NEXT: s_mov_b32 s6, s8 790; GFX90A-NEXT: s_mov_b32 s7, s9 791; GFX90A-NEXT: v_mov_b32_e32 v2, v1 792; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc 793; GFX90A-NEXT: s_waitcnt vmcnt(0) 794; GFX90A-NEXT: ; return to shader part epilog 795; 796; GFX10PLUS-LABEL: atomic_or_i32_1d: 797; GFX10PLUS: ; %bb.0: ; %main_body 798; GFX10PLUS-NEXT: s_mov_b32 s0, s2 799; GFX10PLUS-NEXT: s_mov_b32 s1, s3 800; GFX10PLUS-NEXT: s_mov_b32 s2, s4 801; GFX10PLUS-NEXT: s_mov_b32 s3, s5 802; GFX10PLUS-NEXT: s_mov_b32 s4, s6 803; GFX10PLUS-NEXT: s_mov_b32 s5, s7 804; GFX10PLUS-NEXT: s_mov_b32 s6, s8 805; GFX10PLUS-NEXT: s_mov_b32 s7, s9 806; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 807; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 808; GFX10PLUS-NEXT: ; return to shader part epilog 809; 810; GFX12-LABEL: atomic_or_i32_1d: 811; GFX12: ; %bb.0: ; %main_body 812; GFX12-NEXT: s_mov_b32 s0, s2 813; GFX12-NEXT: s_mov_b32 s1, s3 814; GFX12-NEXT: s_mov_b32 s2, s4 815; GFX12-NEXT: s_mov_b32 s3, s5 816; GFX12-NEXT: s_mov_b32 s4, s6 817; GFX12-NEXT: s_mov_b32 s5, s7 818; GFX12-NEXT: s_mov_b32 s6, s8 819; GFX12-NEXT: s_mov_b32 s7, s9 820; GFX12-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 821; GFX12-NEXT: s_wait_loadcnt 0x0 822; GFX12-NEXT: ; return to shader part epilog 823main_body: 824 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 825 %out = bitcast i32 %v to float 826 ret float %out 827} 828 829define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 830; GFX6-LABEL: atomic_xor_i32_1d: 831; GFX6: ; %bb.0: ; %main_body 832; GFX6-NEXT: s_mov_b32 s0, s2 833; GFX6-NEXT: s_mov_b32 s1, s3 834; GFX6-NEXT: s_mov_b32 s2, s4 835; GFX6-NEXT: s_mov_b32 s3, s5 836; GFX6-NEXT: s_mov_b32 s4, s6 837; GFX6-NEXT: s_mov_b32 s5, s7 838; GFX6-NEXT: s_mov_b32 s6, s8 839; GFX6-NEXT: s_mov_b32 s7, s9 840; GFX6-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc 841; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 842; GFX6-NEXT: ; return to shader part epilog 843; 844; GFX8-LABEL: atomic_xor_i32_1d: 845; GFX8: ; %bb.0: ; %main_body 846; GFX8-NEXT: s_mov_b32 s0, s2 847; GFX8-NEXT: s_mov_b32 s1, s3 848; GFX8-NEXT: s_mov_b32 s2, s4 849; GFX8-NEXT: s_mov_b32 s3, s5 850; GFX8-NEXT: s_mov_b32 s4, s6 851; GFX8-NEXT: s_mov_b32 s5, s7 852; GFX8-NEXT: s_mov_b32 s6, s8 853; GFX8-NEXT: s_mov_b32 s7, s9 854; GFX8-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc 855; GFX8-NEXT: s_waitcnt vmcnt(0) 856; GFX8-NEXT: ; return to shader part epilog 857; 858; GFX900-LABEL: atomic_xor_i32_1d: 859; GFX900: ; %bb.0: ; %main_body 860; GFX900-NEXT: s_mov_b32 s0, s2 861; GFX900-NEXT: s_mov_b32 s1, s3 862; GFX900-NEXT: s_mov_b32 s2, s4 863; GFX900-NEXT: s_mov_b32 s3, s5 864; GFX900-NEXT: s_mov_b32 s4, s6 865; GFX900-NEXT: s_mov_b32 s5, s7 866; GFX900-NEXT: s_mov_b32 s6, s8 867; GFX900-NEXT: s_mov_b32 s7, s9 868; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc 869; GFX900-NEXT: s_waitcnt vmcnt(0) 870; GFX900-NEXT: ; return to shader part epilog 871; 872; GFX90A-LABEL: atomic_xor_i32_1d: 873; GFX90A: ; %bb.0: ; %main_body 874; GFX90A-NEXT: s_mov_b32 s0, s2 875; GFX90A-NEXT: s_mov_b32 s1, s3 876; GFX90A-NEXT: s_mov_b32 s2, s4 877; GFX90A-NEXT: s_mov_b32 s3, s5 878; GFX90A-NEXT: s_mov_b32 s4, s6 879; GFX90A-NEXT: s_mov_b32 s5, s7 880; GFX90A-NEXT: s_mov_b32 s6, s8 881; GFX90A-NEXT: s_mov_b32 s7, s9 882; GFX90A-NEXT: v_mov_b32_e32 v2, v1 883; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc 884; GFX90A-NEXT: s_waitcnt vmcnt(0) 885; GFX90A-NEXT: ; return to shader part epilog 886; 887; GFX10PLUS-LABEL: atomic_xor_i32_1d: 888; GFX10PLUS: ; %bb.0: ; %main_body 889; GFX10PLUS-NEXT: s_mov_b32 s0, s2 890; GFX10PLUS-NEXT: s_mov_b32 s1, s3 891; GFX10PLUS-NEXT: s_mov_b32 s2, s4 892; GFX10PLUS-NEXT: s_mov_b32 s3, s5 893; GFX10PLUS-NEXT: s_mov_b32 s4, s6 894; GFX10PLUS-NEXT: s_mov_b32 s5, s7 895; GFX10PLUS-NEXT: s_mov_b32 s6, s8 896; GFX10PLUS-NEXT: s_mov_b32 s7, s9 897; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 898; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 899; GFX10PLUS-NEXT: ; return to shader part epilog 900; 901; GFX12-LABEL: atomic_xor_i32_1d: 902; GFX12: ; %bb.0: ; %main_body 903; GFX12-NEXT: s_mov_b32 s0, s2 904; GFX12-NEXT: s_mov_b32 s1, s3 905; GFX12-NEXT: s_mov_b32 s2, s4 906; GFX12-NEXT: s_mov_b32 s3, s5 907; GFX12-NEXT: s_mov_b32 s4, s6 908; GFX12-NEXT: s_mov_b32 s5, s7 909; GFX12-NEXT: s_mov_b32 s6, s8 910; GFX12-NEXT: s_mov_b32 s7, s9 911; GFX12-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 912; GFX12-NEXT: s_wait_loadcnt 0x0 913; GFX12-NEXT: ; return to shader part epilog 914main_body: 915 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 916 %out = bitcast i32 %v to float 917 ret float %out 918} 919 920define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 921; GFX6-LABEL: atomic_inc_i32_1d: 922; GFX6: ; %bb.0: ; %main_body 923; GFX6-NEXT: s_mov_b32 s0, s2 924; GFX6-NEXT: s_mov_b32 s1, s3 925; GFX6-NEXT: s_mov_b32 s2, s4 926; GFX6-NEXT: s_mov_b32 s3, s5 927; GFX6-NEXT: s_mov_b32 s4, s6 928; GFX6-NEXT: s_mov_b32 s5, s7 929; GFX6-NEXT: s_mov_b32 s6, s8 930; GFX6-NEXT: s_mov_b32 s7, s9 931; GFX6-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc 932; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 933; GFX6-NEXT: ; return to shader part epilog 934; 935; GFX8-LABEL: atomic_inc_i32_1d: 936; GFX8: ; %bb.0: ; %main_body 937; GFX8-NEXT: s_mov_b32 s0, s2 938; GFX8-NEXT: s_mov_b32 s1, s3 939; GFX8-NEXT: s_mov_b32 s2, s4 940; GFX8-NEXT: s_mov_b32 s3, s5 941; GFX8-NEXT: s_mov_b32 s4, s6 942; GFX8-NEXT: s_mov_b32 s5, s7 943; GFX8-NEXT: s_mov_b32 s6, s8 944; GFX8-NEXT: s_mov_b32 s7, s9 945; GFX8-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc 946; GFX8-NEXT: s_waitcnt vmcnt(0) 947; GFX8-NEXT: ; return to shader part epilog 948; 949; GFX900-LABEL: atomic_inc_i32_1d: 950; GFX900: ; %bb.0: ; %main_body 951; GFX900-NEXT: s_mov_b32 s0, s2 952; GFX900-NEXT: s_mov_b32 s1, s3 953; GFX900-NEXT: s_mov_b32 s2, s4 954; GFX900-NEXT: s_mov_b32 s3, s5 955; GFX900-NEXT: s_mov_b32 s4, s6 956; GFX900-NEXT: s_mov_b32 s5, s7 957; GFX900-NEXT: s_mov_b32 s6, s8 958; GFX900-NEXT: s_mov_b32 s7, s9 959; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc 960; GFX900-NEXT: s_waitcnt vmcnt(0) 961; GFX900-NEXT: ; return to shader part epilog 962; 963; GFX90A-LABEL: atomic_inc_i32_1d: 964; GFX90A: ; %bb.0: ; %main_body 965; GFX90A-NEXT: s_mov_b32 s0, s2 966; GFX90A-NEXT: s_mov_b32 s1, s3 967; GFX90A-NEXT: s_mov_b32 s2, s4 968; GFX90A-NEXT: s_mov_b32 s3, s5 969; GFX90A-NEXT: s_mov_b32 s4, s6 970; GFX90A-NEXT: s_mov_b32 s5, s7 971; GFX90A-NEXT: s_mov_b32 s6, s8 972; GFX90A-NEXT: s_mov_b32 s7, s9 973; GFX90A-NEXT: v_mov_b32_e32 v2, v1 974; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc 975; GFX90A-NEXT: s_waitcnt vmcnt(0) 976; GFX90A-NEXT: ; return to shader part epilog 977; 978; GFX10PLUS-LABEL: atomic_inc_i32_1d: 979; GFX10PLUS: ; %bb.0: ; %main_body 980; GFX10PLUS-NEXT: s_mov_b32 s0, s2 981; GFX10PLUS-NEXT: s_mov_b32 s1, s3 982; GFX10PLUS-NEXT: s_mov_b32 s2, s4 983; GFX10PLUS-NEXT: s_mov_b32 s3, s5 984; GFX10PLUS-NEXT: s_mov_b32 s4, s6 985; GFX10PLUS-NEXT: s_mov_b32 s5, s7 986; GFX10PLUS-NEXT: s_mov_b32 s6, s8 987; GFX10PLUS-NEXT: s_mov_b32 s7, s9 988; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 989; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 990; GFX10PLUS-NEXT: ; return to shader part epilog 991; 992; GFX12-LABEL: atomic_inc_i32_1d: 993; GFX12: ; %bb.0: ; %main_body 994; GFX12-NEXT: s_mov_b32 s0, s2 995; GFX12-NEXT: s_mov_b32 s1, s3 996; GFX12-NEXT: s_mov_b32 s2, s4 997; GFX12-NEXT: s_mov_b32 s3, s5 998; GFX12-NEXT: s_mov_b32 s4, s6 999; GFX12-NEXT: s_mov_b32 s5, s7 1000; GFX12-NEXT: s_mov_b32 s6, s8 1001; GFX12-NEXT: s_mov_b32 s7, s9 1002; GFX12-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 1003; GFX12-NEXT: s_wait_loadcnt 0x0 1004; GFX12-NEXT: ; return to shader part epilog 1005main_body: 1006 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 1007 %out = bitcast i32 %v to float 1008 ret float %out 1009} 1010 1011define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 1012; GFX6-LABEL: atomic_dec_i32_1d: 1013; GFX6: ; %bb.0: ; %main_body 1014; GFX6-NEXT: s_mov_b32 s0, s2 1015; GFX6-NEXT: s_mov_b32 s1, s3 1016; GFX6-NEXT: s_mov_b32 s2, s4 1017; GFX6-NEXT: s_mov_b32 s3, s5 1018; GFX6-NEXT: s_mov_b32 s4, s6 1019; GFX6-NEXT: s_mov_b32 s5, s7 1020; GFX6-NEXT: s_mov_b32 s6, s8 1021; GFX6-NEXT: s_mov_b32 s7, s9 1022; GFX6-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc 1023; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1024; GFX6-NEXT: ; return to shader part epilog 1025; 1026; GFX8-LABEL: atomic_dec_i32_1d: 1027; GFX8: ; %bb.0: ; %main_body 1028; GFX8-NEXT: s_mov_b32 s0, s2 1029; GFX8-NEXT: s_mov_b32 s1, s3 1030; GFX8-NEXT: s_mov_b32 s2, s4 1031; GFX8-NEXT: s_mov_b32 s3, s5 1032; GFX8-NEXT: s_mov_b32 s4, s6 1033; GFX8-NEXT: s_mov_b32 s5, s7 1034; GFX8-NEXT: s_mov_b32 s6, s8 1035; GFX8-NEXT: s_mov_b32 s7, s9 1036; GFX8-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc 1037; GFX8-NEXT: s_waitcnt vmcnt(0) 1038; GFX8-NEXT: ; return to shader part epilog 1039; 1040; GFX900-LABEL: atomic_dec_i32_1d: 1041; GFX900: ; %bb.0: ; %main_body 1042; GFX900-NEXT: s_mov_b32 s0, s2 1043; GFX900-NEXT: s_mov_b32 s1, s3 1044; GFX900-NEXT: s_mov_b32 s2, s4 1045; GFX900-NEXT: s_mov_b32 s3, s5 1046; GFX900-NEXT: s_mov_b32 s4, s6 1047; GFX900-NEXT: s_mov_b32 s5, s7 1048; GFX900-NEXT: s_mov_b32 s6, s8 1049; GFX900-NEXT: s_mov_b32 s7, s9 1050; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc 1051; GFX900-NEXT: s_waitcnt vmcnt(0) 1052; GFX900-NEXT: ; return to shader part epilog 1053; 1054; GFX90A-LABEL: atomic_dec_i32_1d: 1055; GFX90A: ; %bb.0: ; %main_body 1056; GFX90A-NEXT: s_mov_b32 s0, s2 1057; GFX90A-NEXT: s_mov_b32 s1, s3 1058; GFX90A-NEXT: s_mov_b32 s2, s4 1059; GFX90A-NEXT: s_mov_b32 s3, s5 1060; GFX90A-NEXT: s_mov_b32 s4, s6 1061; GFX90A-NEXT: s_mov_b32 s5, s7 1062; GFX90A-NEXT: s_mov_b32 s6, s8 1063; GFX90A-NEXT: s_mov_b32 s7, s9 1064; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1065; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc 1066; GFX90A-NEXT: s_waitcnt vmcnt(0) 1067; GFX90A-NEXT: ; return to shader part epilog 1068; 1069; GFX10PLUS-LABEL: atomic_dec_i32_1d: 1070; GFX10PLUS: ; %bb.0: ; %main_body 1071; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1072; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1073; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1074; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1075; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1076; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1077; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1078; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1079; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc 1080; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1081; GFX10PLUS-NEXT: ; return to shader part epilog 1082; 1083; GFX12-LABEL: atomic_dec_i32_1d: 1084; GFX12: ; %bb.0: ; %main_body 1085; GFX12-NEXT: s_mov_b32 s0, s2 1086; GFX12-NEXT: s_mov_b32 s1, s3 1087; GFX12-NEXT: s_mov_b32 s2, s4 1088; GFX12-NEXT: s_mov_b32 s3, s5 1089; GFX12-NEXT: s_mov_b32 s4, s6 1090; GFX12-NEXT: s_mov_b32 s5, s7 1091; GFX12-NEXT: s_mov_b32 s6, s8 1092; GFX12-NEXT: s_mov_b32 s7, s9 1093; GFX12-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 1094; GFX12-NEXT: s_wait_loadcnt 0x0 1095; GFX12-NEXT: ; return to shader part epilog 1096main_body: 1097 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 1098 %out = bitcast i32 %v to float 1099 ret float %out 1100} 1101 1102define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { 1103; GFX6-LABEL: atomic_cmpswap_i32_1d: 1104; GFX6: ; %bb.0: ; %main_body 1105; GFX6-NEXT: s_mov_b32 s0, s2 1106; GFX6-NEXT: s_mov_b32 s1, s3 1107; GFX6-NEXT: s_mov_b32 s2, s4 1108; GFX6-NEXT: s_mov_b32 s3, s5 1109; GFX6-NEXT: s_mov_b32 s4, s6 1110; GFX6-NEXT: s_mov_b32 s5, s7 1111; GFX6-NEXT: s_mov_b32 s6, s8 1112; GFX6-NEXT: s_mov_b32 s7, s9 1113; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1114; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1115; GFX6-NEXT: ; return to shader part epilog 1116; 1117; GFX8-LABEL: atomic_cmpswap_i32_1d: 1118; GFX8: ; %bb.0: ; %main_body 1119; GFX8-NEXT: s_mov_b32 s0, s2 1120; GFX8-NEXT: s_mov_b32 s1, s3 1121; GFX8-NEXT: s_mov_b32 s2, s4 1122; GFX8-NEXT: s_mov_b32 s3, s5 1123; GFX8-NEXT: s_mov_b32 s4, s6 1124; GFX8-NEXT: s_mov_b32 s5, s7 1125; GFX8-NEXT: s_mov_b32 s6, s8 1126; GFX8-NEXT: s_mov_b32 s7, s9 1127; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1128; GFX8-NEXT: s_waitcnt vmcnt(0) 1129; GFX8-NEXT: ; return to shader part epilog 1130; 1131; GFX900-LABEL: atomic_cmpswap_i32_1d: 1132; GFX900: ; %bb.0: ; %main_body 1133; GFX900-NEXT: s_mov_b32 s0, s2 1134; GFX900-NEXT: s_mov_b32 s1, s3 1135; GFX900-NEXT: s_mov_b32 s2, s4 1136; GFX900-NEXT: s_mov_b32 s3, s5 1137; GFX900-NEXT: s_mov_b32 s4, s6 1138; GFX900-NEXT: s_mov_b32 s5, s7 1139; GFX900-NEXT: s_mov_b32 s6, s8 1140; GFX900-NEXT: s_mov_b32 s7, s9 1141; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1142; GFX900-NEXT: s_waitcnt vmcnt(0) 1143; GFX900-NEXT: ; return to shader part epilog 1144; 1145; GFX90A-LABEL: atomic_cmpswap_i32_1d: 1146; GFX90A: ; %bb.0: ; %main_body 1147; GFX90A-NEXT: s_mov_b32 s0, s2 1148; GFX90A-NEXT: s_mov_b32 s1, s3 1149; GFX90A-NEXT: s_mov_b32 s2, s4 1150; GFX90A-NEXT: s_mov_b32 s3, s5 1151; GFX90A-NEXT: s_mov_b32 s4, s6 1152; GFX90A-NEXT: s_mov_b32 s5, s7 1153; GFX90A-NEXT: s_mov_b32 s6, s8 1154; GFX90A-NEXT: s_mov_b32 s7, s9 1155; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1156; GFX90A-NEXT: s_waitcnt vmcnt(0) 1157; GFX90A-NEXT: ; return to shader part epilog 1158; 1159; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d: 1160; GFX10PLUS: ; %bb.0: ; %main_body 1161; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1162; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1163; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1164; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1165; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1166; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1167; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1168; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1169; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 1170; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1171; GFX10PLUS-NEXT: ; return to shader part epilog 1172; 1173; GFX12-LABEL: atomic_cmpswap_i32_1d: 1174; GFX12: ; %bb.0: ; %main_body 1175; GFX12-NEXT: s_mov_b32 s0, s2 1176; GFX12-NEXT: s_mov_b32 s1, s3 1177; GFX12-NEXT: s_mov_b32 s2, s4 1178; GFX12-NEXT: s_mov_b32 s3, s5 1179; GFX12-NEXT: s_mov_b32 s4, s6 1180; GFX12-NEXT: s_mov_b32 s5, s7 1181; GFX12-NEXT: s_mov_b32 s6, s8 1182; GFX12-NEXT: s_mov_b32 s7, s9 1183; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 1184; GFX12-NEXT: s_wait_loadcnt 0x0 1185; GFX12-NEXT: ; return to shader part epilog 1186main_body: 1187 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 1188 %out = bitcast i32 %v to float 1189 ret float %out 1190} 1191 1192define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { 1193; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return: 1194; GFX6: ; %bb.0: ; %main_body 1195; GFX6-NEXT: s_mov_b32 s0, s2 1196; GFX6-NEXT: s_mov_b32 s1, s3 1197; GFX6-NEXT: s_mov_b32 s2, s4 1198; GFX6-NEXT: s_mov_b32 s3, s5 1199; GFX6-NEXT: s_mov_b32 s4, s6 1200; GFX6-NEXT: s_mov_b32 s5, s7 1201; GFX6-NEXT: s_mov_b32 s6, s8 1202; GFX6-NEXT: s_mov_b32 s7, s9 1203; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1204; GFX6-NEXT: s_endpgm 1205; 1206; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return: 1207; GFX8: ; %bb.0: ; %main_body 1208; GFX8-NEXT: s_mov_b32 s0, s2 1209; GFX8-NEXT: s_mov_b32 s1, s3 1210; GFX8-NEXT: s_mov_b32 s2, s4 1211; GFX8-NEXT: s_mov_b32 s3, s5 1212; GFX8-NEXT: s_mov_b32 s4, s6 1213; GFX8-NEXT: s_mov_b32 s5, s7 1214; GFX8-NEXT: s_mov_b32 s6, s8 1215; GFX8-NEXT: s_mov_b32 s7, s9 1216; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1217; GFX8-NEXT: s_endpgm 1218; 1219; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return: 1220; GFX900: ; %bb.0: ; %main_body 1221; GFX900-NEXT: s_mov_b32 s0, s2 1222; GFX900-NEXT: s_mov_b32 s1, s3 1223; GFX900-NEXT: s_mov_b32 s2, s4 1224; GFX900-NEXT: s_mov_b32 s3, s5 1225; GFX900-NEXT: s_mov_b32 s4, s6 1226; GFX900-NEXT: s_mov_b32 s5, s7 1227; GFX900-NEXT: s_mov_b32 s6, s8 1228; GFX900-NEXT: s_mov_b32 s7, s9 1229; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1230; GFX900-NEXT: s_endpgm 1231; 1232; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return: 1233; GFX90A: ; %bb.0: ; %main_body 1234; GFX90A-NEXT: s_mov_b32 s0, s2 1235; GFX90A-NEXT: s_mov_b32 s1, s3 1236; GFX90A-NEXT: s_mov_b32 s2, s4 1237; GFX90A-NEXT: s_mov_b32 s3, s5 1238; GFX90A-NEXT: s_mov_b32 s4, s6 1239; GFX90A-NEXT: s_mov_b32 s5, s7 1240; GFX90A-NEXT: s_mov_b32 s6, s8 1241; GFX90A-NEXT: s_mov_b32 s7, s9 1242; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 1243; GFX90A-NEXT: s_endpgm 1244; 1245; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return: 1246; GFX10PLUS: ; %bb.0: ; %main_body 1247; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1248; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1249; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1250; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1251; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1252; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1253; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1254; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1255; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 1256; GFX10PLUS-NEXT: s_endpgm 1257; 1258; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return: 1259; GFX12: ; %bb.0: ; %main_body 1260; GFX12-NEXT: s_mov_b32 s0, s2 1261; GFX12-NEXT: s_mov_b32 s1, s3 1262; GFX12-NEXT: s_mov_b32 s2, s4 1263; GFX12-NEXT: s_mov_b32 s3, s5 1264; GFX12-NEXT: s_mov_b32 s4, s6 1265; GFX12-NEXT: s_mov_b32 s5, s7 1266; GFX12-NEXT: s_mov_b32 s6, s8 1267; GFX12-NEXT: s_mov_b32 s7, s9 1268; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 1269; GFX12-NEXT: s_endpgm 1270main_body: 1271 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 1272 ret void 1273} 1274 1275define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { 1276; GFX6-LABEL: atomic_add_i32_2d: 1277; GFX6: ; %bb.0: ; %main_body 1278; GFX6-NEXT: s_mov_b32 s0, s2 1279; GFX6-NEXT: s_mov_b32 s1, s3 1280; GFX6-NEXT: s_mov_b32 s2, s4 1281; GFX6-NEXT: s_mov_b32 s3, s5 1282; GFX6-NEXT: s_mov_b32 s4, s6 1283; GFX6-NEXT: s_mov_b32 s5, s7 1284; GFX6-NEXT: s_mov_b32 s6, s8 1285; GFX6-NEXT: s_mov_b32 s7, s9 1286; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc 1287; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1288; GFX6-NEXT: ; return to shader part epilog 1289; 1290; GFX8-LABEL: atomic_add_i32_2d: 1291; GFX8: ; %bb.0: ; %main_body 1292; GFX8-NEXT: s_mov_b32 s0, s2 1293; GFX8-NEXT: s_mov_b32 s1, s3 1294; GFX8-NEXT: s_mov_b32 s2, s4 1295; GFX8-NEXT: s_mov_b32 s3, s5 1296; GFX8-NEXT: s_mov_b32 s4, s6 1297; GFX8-NEXT: s_mov_b32 s5, s7 1298; GFX8-NEXT: s_mov_b32 s6, s8 1299; GFX8-NEXT: s_mov_b32 s7, s9 1300; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc 1301; GFX8-NEXT: s_waitcnt vmcnt(0) 1302; GFX8-NEXT: ; return to shader part epilog 1303; 1304; GFX900-LABEL: atomic_add_i32_2d: 1305; GFX900: ; %bb.0: ; %main_body 1306; GFX900-NEXT: s_mov_b32 s0, s2 1307; GFX900-NEXT: s_mov_b32 s1, s3 1308; GFX900-NEXT: s_mov_b32 s2, s4 1309; GFX900-NEXT: s_mov_b32 s3, s5 1310; GFX900-NEXT: s_mov_b32 s4, s6 1311; GFX900-NEXT: s_mov_b32 s5, s7 1312; GFX900-NEXT: s_mov_b32 s6, s8 1313; GFX900-NEXT: s_mov_b32 s7, s9 1314; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc 1315; GFX900-NEXT: s_waitcnt vmcnt(0) 1316; GFX900-NEXT: ; return to shader part epilog 1317; 1318; GFX90A-LABEL: atomic_add_i32_2d: 1319; GFX90A: ; %bb.0: ; %main_body 1320; GFX90A-NEXT: s_mov_b32 s0, s2 1321; GFX90A-NEXT: s_mov_b32 s1, s3 1322; GFX90A-NEXT: s_mov_b32 s2, s4 1323; GFX90A-NEXT: s_mov_b32 s3, s5 1324; GFX90A-NEXT: s_mov_b32 s4, s6 1325; GFX90A-NEXT: s_mov_b32 s5, s7 1326; GFX90A-NEXT: s_mov_b32 s6, s8 1327; GFX90A-NEXT: s_mov_b32 s7, s9 1328; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1329; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1330; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc 1331; GFX90A-NEXT: s_waitcnt vmcnt(0) 1332; GFX90A-NEXT: ; return to shader part epilog 1333; 1334; GFX10PLUS-LABEL: atomic_add_i32_2d: 1335; GFX10PLUS: ; %bb.0: ; %main_body 1336; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1337; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1338; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1339; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1340; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1341; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1342; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1343; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1344; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc 1345; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1346; GFX10PLUS-NEXT: ; return to shader part epilog 1347; 1348; GFX12-LABEL: atomic_add_i32_2d: 1349; GFX12: ; %bb.0: ; %main_body 1350; GFX12-NEXT: s_mov_b32 s0, s2 1351; GFX12-NEXT: s_mov_b32 s1, s3 1352; GFX12-NEXT: s_mov_b32 s2, s4 1353; GFX12-NEXT: s_mov_b32 s3, s5 1354; GFX12-NEXT: s_mov_b32 s4, s6 1355; GFX12-NEXT: s_mov_b32 s5, s7 1356; GFX12-NEXT: s_mov_b32 s6, s8 1357; GFX12-NEXT: s_mov_b32 s7, s9 1358; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN 1359; GFX12-NEXT: s_wait_loadcnt 0x0 1360; GFX12-NEXT: ; return to shader part epilog 1361main_body: 1362 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 1363 %out = bitcast i32 %v to float 1364 ret float %out 1365} 1366 1367define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { 1368; GFX6-LABEL: atomic_add_i32_3d: 1369; GFX6: ; %bb.0: ; %main_body 1370; GFX6-NEXT: s_mov_b32 s0, s2 1371; GFX6-NEXT: s_mov_b32 s1, s3 1372; GFX6-NEXT: s_mov_b32 s2, s4 1373; GFX6-NEXT: s_mov_b32 s3, s5 1374; GFX6-NEXT: s_mov_b32 s4, s6 1375; GFX6-NEXT: s_mov_b32 s5, s7 1376; GFX6-NEXT: s_mov_b32 s6, s8 1377; GFX6-NEXT: s_mov_b32 s7, s9 1378; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1379; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1380; GFX6-NEXT: ; return to shader part epilog 1381; 1382; GFX8-LABEL: atomic_add_i32_3d: 1383; GFX8: ; %bb.0: ; %main_body 1384; GFX8-NEXT: s_mov_b32 s0, s2 1385; GFX8-NEXT: s_mov_b32 s1, s3 1386; GFX8-NEXT: s_mov_b32 s2, s4 1387; GFX8-NEXT: s_mov_b32 s3, s5 1388; GFX8-NEXT: s_mov_b32 s4, s6 1389; GFX8-NEXT: s_mov_b32 s5, s7 1390; GFX8-NEXT: s_mov_b32 s6, s8 1391; GFX8-NEXT: s_mov_b32 s7, s9 1392; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1393; GFX8-NEXT: s_waitcnt vmcnt(0) 1394; GFX8-NEXT: ; return to shader part epilog 1395; 1396; GFX900-LABEL: atomic_add_i32_3d: 1397; GFX900: ; %bb.0: ; %main_body 1398; GFX900-NEXT: s_mov_b32 s0, s2 1399; GFX900-NEXT: s_mov_b32 s1, s3 1400; GFX900-NEXT: s_mov_b32 s2, s4 1401; GFX900-NEXT: s_mov_b32 s3, s5 1402; GFX900-NEXT: s_mov_b32 s4, s6 1403; GFX900-NEXT: s_mov_b32 s5, s7 1404; GFX900-NEXT: s_mov_b32 s6, s8 1405; GFX900-NEXT: s_mov_b32 s7, s9 1406; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1407; GFX900-NEXT: s_waitcnt vmcnt(0) 1408; GFX900-NEXT: ; return to shader part epilog 1409; 1410; GFX90A-LABEL: atomic_add_i32_3d: 1411; GFX90A: ; %bb.0: ; %main_body 1412; GFX90A-NEXT: s_mov_b32 s0, s2 1413; GFX90A-NEXT: s_mov_b32 s1, s3 1414; GFX90A-NEXT: s_mov_b32 s2, s4 1415; GFX90A-NEXT: s_mov_b32 s3, s5 1416; GFX90A-NEXT: s_mov_b32 s4, s6 1417; GFX90A-NEXT: s_mov_b32 s5, s7 1418; GFX90A-NEXT: s_mov_b32 s6, s8 1419; GFX90A-NEXT: s_mov_b32 s7, s9 1420; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1421; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1422; GFX90A-NEXT: v_mov_b32_e32 v6, v3 1423; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc 1424; GFX90A-NEXT: s_waitcnt vmcnt(0) 1425; GFX90A-NEXT: ; return to shader part epilog 1426; 1427; GFX10PLUS-LABEL: atomic_add_i32_3d: 1428; GFX10PLUS: ; %bb.0: ; %main_body 1429; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1430; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1431; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1432; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1433; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1434; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1435; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1436; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1437; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc 1438; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1439; GFX10PLUS-NEXT: ; return to shader part epilog 1440; 1441; GFX12-LABEL: atomic_add_i32_3d: 1442; GFX12: ; %bb.0: ; %main_body 1443; GFX12-NEXT: s_mov_b32 s0, s2 1444; GFX12-NEXT: s_mov_b32 s1, s3 1445; GFX12-NEXT: s_mov_b32 s2, s4 1446; GFX12-NEXT: s_mov_b32 s3, s5 1447; GFX12-NEXT: s_mov_b32 s4, s6 1448; GFX12-NEXT: s_mov_b32 s5, s7 1449; GFX12-NEXT: s_mov_b32 s6, s8 1450; GFX12-NEXT: s_mov_b32 s7, s9 1451; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN 1452; GFX12-NEXT: s_wait_loadcnt 0x0 1453; GFX12-NEXT: ; return to shader part epilog 1454main_body: 1455 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 1456 %out = bitcast i32 %v to float 1457 ret float %out 1458} 1459 1460define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { 1461; GFX6-LABEL: atomic_add_i32_cube: 1462; GFX6: ; %bb.0: ; %main_body 1463; GFX6-NEXT: s_mov_b32 s0, s2 1464; GFX6-NEXT: s_mov_b32 s1, s3 1465; GFX6-NEXT: s_mov_b32 s2, s4 1466; GFX6-NEXT: s_mov_b32 s3, s5 1467; GFX6-NEXT: s_mov_b32 s4, s6 1468; GFX6-NEXT: s_mov_b32 s5, s7 1469; GFX6-NEXT: s_mov_b32 s6, s8 1470; GFX6-NEXT: s_mov_b32 s7, s9 1471; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1472; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1473; GFX6-NEXT: ; return to shader part epilog 1474; 1475; GFX8-LABEL: atomic_add_i32_cube: 1476; GFX8: ; %bb.0: ; %main_body 1477; GFX8-NEXT: s_mov_b32 s0, s2 1478; GFX8-NEXT: s_mov_b32 s1, s3 1479; GFX8-NEXT: s_mov_b32 s2, s4 1480; GFX8-NEXT: s_mov_b32 s3, s5 1481; GFX8-NEXT: s_mov_b32 s4, s6 1482; GFX8-NEXT: s_mov_b32 s5, s7 1483; GFX8-NEXT: s_mov_b32 s6, s8 1484; GFX8-NEXT: s_mov_b32 s7, s9 1485; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1486; GFX8-NEXT: s_waitcnt vmcnt(0) 1487; GFX8-NEXT: ; return to shader part epilog 1488; 1489; GFX900-LABEL: atomic_add_i32_cube: 1490; GFX900: ; %bb.0: ; %main_body 1491; GFX900-NEXT: s_mov_b32 s0, s2 1492; GFX900-NEXT: s_mov_b32 s1, s3 1493; GFX900-NEXT: s_mov_b32 s2, s4 1494; GFX900-NEXT: s_mov_b32 s3, s5 1495; GFX900-NEXT: s_mov_b32 s4, s6 1496; GFX900-NEXT: s_mov_b32 s5, s7 1497; GFX900-NEXT: s_mov_b32 s6, s8 1498; GFX900-NEXT: s_mov_b32 s7, s9 1499; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1500; GFX900-NEXT: s_waitcnt vmcnt(0) 1501; GFX900-NEXT: ; return to shader part epilog 1502; 1503; GFX90A-LABEL: atomic_add_i32_cube: 1504; GFX90A: ; %bb.0: ; %main_body 1505; GFX90A-NEXT: s_mov_b32 s0, s2 1506; GFX90A-NEXT: s_mov_b32 s1, s3 1507; GFX90A-NEXT: s_mov_b32 s2, s4 1508; GFX90A-NEXT: s_mov_b32 s3, s5 1509; GFX90A-NEXT: s_mov_b32 s4, s6 1510; GFX90A-NEXT: s_mov_b32 s5, s7 1511; GFX90A-NEXT: s_mov_b32 s6, s8 1512; GFX90A-NEXT: s_mov_b32 s7, s9 1513; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1514; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1515; GFX90A-NEXT: v_mov_b32_e32 v6, v3 1516; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da 1517; GFX90A-NEXT: s_waitcnt vmcnt(0) 1518; GFX90A-NEXT: ; return to shader part epilog 1519; 1520; GFX10PLUS-LABEL: atomic_add_i32_cube: 1521; GFX10PLUS: ; %bb.0: ; %main_body 1522; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1523; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1524; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1525; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1526; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1527; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1528; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1529; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1530; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc 1531; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1532; GFX10PLUS-NEXT: ; return to shader part epilog 1533; 1534; GFX12-LABEL: atomic_add_i32_cube: 1535; GFX12: ; %bb.0: ; %main_body 1536; GFX12-NEXT: s_mov_b32 s0, s2 1537; GFX12-NEXT: s_mov_b32 s1, s3 1538; GFX12-NEXT: s_mov_b32 s2, s4 1539; GFX12-NEXT: s_mov_b32 s3, s5 1540; GFX12-NEXT: s_mov_b32 s4, s6 1541; GFX12-NEXT: s_mov_b32 s5, s7 1542; GFX12-NEXT: s_mov_b32 s6, s8 1543; GFX12-NEXT: s_mov_b32 s7, s9 1544; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN 1545; GFX12-NEXT: s_wait_loadcnt 0x0 1546; GFX12-NEXT: ; return to shader part epilog 1547main_body: 1548 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) 1549 %out = bitcast i32 %v to float 1550 ret float %out 1551} 1552 1553define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { 1554; GFX6-LABEL: atomic_add_i32_1darray: 1555; GFX6: ; %bb.0: ; %main_body 1556; GFX6-NEXT: s_mov_b32 s0, s2 1557; GFX6-NEXT: s_mov_b32 s1, s3 1558; GFX6-NEXT: s_mov_b32 s2, s4 1559; GFX6-NEXT: s_mov_b32 s3, s5 1560; GFX6-NEXT: s_mov_b32 s4, s6 1561; GFX6-NEXT: s_mov_b32 s5, s7 1562; GFX6-NEXT: s_mov_b32 s6, s8 1563; GFX6-NEXT: s_mov_b32 s7, s9 1564; GFX6-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da 1565; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1566; GFX6-NEXT: ; return to shader part epilog 1567; 1568; GFX8-LABEL: atomic_add_i32_1darray: 1569; GFX8: ; %bb.0: ; %main_body 1570; GFX8-NEXT: s_mov_b32 s0, s2 1571; GFX8-NEXT: s_mov_b32 s1, s3 1572; GFX8-NEXT: s_mov_b32 s2, s4 1573; GFX8-NEXT: s_mov_b32 s3, s5 1574; GFX8-NEXT: s_mov_b32 s4, s6 1575; GFX8-NEXT: s_mov_b32 s5, s7 1576; GFX8-NEXT: s_mov_b32 s6, s8 1577; GFX8-NEXT: s_mov_b32 s7, s9 1578; GFX8-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da 1579; GFX8-NEXT: s_waitcnt vmcnt(0) 1580; GFX8-NEXT: ; return to shader part epilog 1581; 1582; GFX900-LABEL: atomic_add_i32_1darray: 1583; GFX900: ; %bb.0: ; %main_body 1584; GFX900-NEXT: s_mov_b32 s0, s2 1585; GFX900-NEXT: s_mov_b32 s1, s3 1586; GFX900-NEXT: s_mov_b32 s2, s4 1587; GFX900-NEXT: s_mov_b32 s3, s5 1588; GFX900-NEXT: s_mov_b32 s4, s6 1589; GFX900-NEXT: s_mov_b32 s5, s7 1590; GFX900-NEXT: s_mov_b32 s6, s8 1591; GFX900-NEXT: s_mov_b32 s7, s9 1592; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da 1593; GFX900-NEXT: s_waitcnt vmcnt(0) 1594; GFX900-NEXT: ; return to shader part epilog 1595; 1596; GFX90A-LABEL: atomic_add_i32_1darray: 1597; GFX90A: ; %bb.0: ; %main_body 1598; GFX90A-NEXT: s_mov_b32 s0, s2 1599; GFX90A-NEXT: s_mov_b32 s1, s3 1600; GFX90A-NEXT: s_mov_b32 s2, s4 1601; GFX90A-NEXT: s_mov_b32 s3, s5 1602; GFX90A-NEXT: s_mov_b32 s4, s6 1603; GFX90A-NEXT: s_mov_b32 s5, s7 1604; GFX90A-NEXT: s_mov_b32 s6, s8 1605; GFX90A-NEXT: s_mov_b32 s7, s9 1606; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1607; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1608; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da 1609; GFX90A-NEXT: s_waitcnt vmcnt(0) 1610; GFX90A-NEXT: ; return to shader part epilog 1611; 1612; GFX10PLUS-LABEL: atomic_add_i32_1darray: 1613; GFX10PLUS: ; %bb.0: ; %main_body 1614; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1615; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1616; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1617; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1618; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1619; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1620; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1621; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1622; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc 1623; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1624; GFX10PLUS-NEXT: ; return to shader part epilog 1625; 1626; GFX12-LABEL: atomic_add_i32_1darray: 1627; GFX12: ; %bb.0: ; %main_body 1628; GFX12-NEXT: s_mov_b32 s0, s2 1629; GFX12-NEXT: s_mov_b32 s1, s3 1630; GFX12-NEXT: s_mov_b32 s2, s4 1631; GFX12-NEXT: s_mov_b32 s3, s5 1632; GFX12-NEXT: s_mov_b32 s4, s6 1633; GFX12-NEXT: s_mov_b32 s5, s7 1634; GFX12-NEXT: s_mov_b32 s6, s8 1635; GFX12-NEXT: s_mov_b32 s7, s9 1636; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN 1637; GFX12-NEXT: s_wait_loadcnt 0x0 1638; GFX12-NEXT: ; return to shader part epilog 1639main_body: 1640 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1641 %out = bitcast i32 %v to float 1642 ret float %out 1643} 1644 1645define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { 1646; GFX6-LABEL: atomic_add_i32_2darray: 1647; GFX6: ; %bb.0: ; %main_body 1648; GFX6-NEXT: s_mov_b32 s0, s2 1649; GFX6-NEXT: s_mov_b32 s1, s3 1650; GFX6-NEXT: s_mov_b32 s2, s4 1651; GFX6-NEXT: s_mov_b32 s3, s5 1652; GFX6-NEXT: s_mov_b32 s4, s6 1653; GFX6-NEXT: s_mov_b32 s5, s7 1654; GFX6-NEXT: s_mov_b32 s6, s8 1655; GFX6-NEXT: s_mov_b32 s7, s9 1656; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1657; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1658; GFX6-NEXT: ; return to shader part epilog 1659; 1660; GFX8-LABEL: atomic_add_i32_2darray: 1661; GFX8: ; %bb.0: ; %main_body 1662; GFX8-NEXT: s_mov_b32 s0, s2 1663; GFX8-NEXT: s_mov_b32 s1, s3 1664; GFX8-NEXT: s_mov_b32 s2, s4 1665; GFX8-NEXT: s_mov_b32 s3, s5 1666; GFX8-NEXT: s_mov_b32 s4, s6 1667; GFX8-NEXT: s_mov_b32 s5, s7 1668; GFX8-NEXT: s_mov_b32 s6, s8 1669; GFX8-NEXT: s_mov_b32 s7, s9 1670; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1671; GFX8-NEXT: s_waitcnt vmcnt(0) 1672; GFX8-NEXT: ; return to shader part epilog 1673; 1674; GFX900-LABEL: atomic_add_i32_2darray: 1675; GFX900: ; %bb.0: ; %main_body 1676; GFX900-NEXT: s_mov_b32 s0, s2 1677; GFX900-NEXT: s_mov_b32 s1, s3 1678; GFX900-NEXT: s_mov_b32 s2, s4 1679; GFX900-NEXT: s_mov_b32 s3, s5 1680; GFX900-NEXT: s_mov_b32 s4, s6 1681; GFX900-NEXT: s_mov_b32 s5, s7 1682; GFX900-NEXT: s_mov_b32 s6, s8 1683; GFX900-NEXT: s_mov_b32 s7, s9 1684; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da 1685; GFX900-NEXT: s_waitcnt vmcnt(0) 1686; GFX900-NEXT: ; return to shader part epilog 1687; 1688; GFX90A-LABEL: atomic_add_i32_2darray: 1689; GFX90A: ; %bb.0: ; %main_body 1690; GFX90A-NEXT: s_mov_b32 s0, s2 1691; GFX90A-NEXT: s_mov_b32 s1, s3 1692; GFX90A-NEXT: s_mov_b32 s2, s4 1693; GFX90A-NEXT: s_mov_b32 s3, s5 1694; GFX90A-NEXT: s_mov_b32 s4, s6 1695; GFX90A-NEXT: s_mov_b32 s5, s7 1696; GFX90A-NEXT: s_mov_b32 s6, s8 1697; GFX90A-NEXT: s_mov_b32 s7, s9 1698; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1699; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1700; GFX90A-NEXT: v_mov_b32_e32 v6, v3 1701; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da 1702; GFX90A-NEXT: s_waitcnt vmcnt(0) 1703; GFX90A-NEXT: ; return to shader part epilog 1704; 1705; GFX10PLUS-LABEL: atomic_add_i32_2darray: 1706; GFX10PLUS: ; %bb.0: ; %main_body 1707; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1708; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1709; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1710; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1711; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1712; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1713; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1714; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1715; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc 1716; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1717; GFX10PLUS-NEXT: ; return to shader part epilog 1718; 1719; GFX12-LABEL: atomic_add_i32_2darray: 1720; GFX12: ; %bb.0: ; %main_body 1721; GFX12-NEXT: s_mov_b32 s0, s2 1722; GFX12-NEXT: s_mov_b32 s1, s3 1723; GFX12-NEXT: s_mov_b32 s2, s4 1724; GFX12-NEXT: s_mov_b32 s3, s5 1725; GFX12-NEXT: s_mov_b32 s4, s6 1726; GFX12-NEXT: s_mov_b32 s5, s7 1727; GFX12-NEXT: s_mov_b32 s6, s8 1728; GFX12-NEXT: s_mov_b32 s7, s9 1729; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN 1730; GFX12-NEXT: s_wait_loadcnt 0x0 1731; GFX12-NEXT: ; return to shader part epilog 1732main_body: 1733 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1734 %out = bitcast i32 %v to float 1735 ret float %out 1736} 1737 1738define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { 1739; GFX6-LABEL: atomic_add_i32_2dmsaa: 1740; GFX6: ; %bb.0: ; %main_body 1741; GFX6-NEXT: s_mov_b32 s0, s2 1742; GFX6-NEXT: s_mov_b32 s1, s3 1743; GFX6-NEXT: s_mov_b32 s2, s4 1744; GFX6-NEXT: s_mov_b32 s3, s5 1745; GFX6-NEXT: s_mov_b32 s4, s6 1746; GFX6-NEXT: s_mov_b32 s5, s7 1747; GFX6-NEXT: s_mov_b32 s6, s8 1748; GFX6-NEXT: s_mov_b32 s7, s9 1749; GFX6-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1750; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1751; GFX6-NEXT: ; return to shader part epilog 1752; 1753; GFX8-LABEL: atomic_add_i32_2dmsaa: 1754; GFX8: ; %bb.0: ; %main_body 1755; GFX8-NEXT: s_mov_b32 s0, s2 1756; GFX8-NEXT: s_mov_b32 s1, s3 1757; GFX8-NEXT: s_mov_b32 s2, s4 1758; GFX8-NEXT: s_mov_b32 s3, s5 1759; GFX8-NEXT: s_mov_b32 s4, s6 1760; GFX8-NEXT: s_mov_b32 s5, s7 1761; GFX8-NEXT: s_mov_b32 s6, s8 1762; GFX8-NEXT: s_mov_b32 s7, s9 1763; GFX8-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1764; GFX8-NEXT: s_waitcnt vmcnt(0) 1765; GFX8-NEXT: ; return to shader part epilog 1766; 1767; GFX900-LABEL: atomic_add_i32_2dmsaa: 1768; GFX900: ; %bb.0: ; %main_body 1769; GFX900-NEXT: s_mov_b32 s0, s2 1770; GFX900-NEXT: s_mov_b32 s1, s3 1771; GFX900-NEXT: s_mov_b32 s2, s4 1772; GFX900-NEXT: s_mov_b32 s3, s5 1773; GFX900-NEXT: s_mov_b32 s4, s6 1774; GFX900-NEXT: s_mov_b32 s5, s7 1775; GFX900-NEXT: s_mov_b32 s6, s8 1776; GFX900-NEXT: s_mov_b32 s7, s9 1777; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc 1778; GFX900-NEXT: s_waitcnt vmcnt(0) 1779; GFX900-NEXT: ; return to shader part epilog 1780; 1781; GFX90A-LABEL: atomic_add_i32_2dmsaa: 1782; GFX90A: ; %bb.0: ; %main_body 1783; GFX90A-NEXT: s_mov_b32 s0, s2 1784; GFX90A-NEXT: s_mov_b32 s1, s3 1785; GFX90A-NEXT: s_mov_b32 s2, s4 1786; GFX90A-NEXT: s_mov_b32 s3, s5 1787; GFX90A-NEXT: s_mov_b32 s4, s6 1788; GFX90A-NEXT: s_mov_b32 s5, s7 1789; GFX90A-NEXT: s_mov_b32 s6, s8 1790; GFX90A-NEXT: s_mov_b32 s7, s9 1791; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1792; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1793; GFX90A-NEXT: v_mov_b32_e32 v6, v3 1794; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc 1795; GFX90A-NEXT: s_waitcnt vmcnt(0) 1796; GFX90A-NEXT: ; return to shader part epilog 1797; 1798; GFX10PLUS-LABEL: atomic_add_i32_2dmsaa: 1799; GFX10PLUS: ; %bb.0: ; %main_body 1800; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1801; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1802; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1803; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1804; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1805; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1806; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1807; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1808; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc 1809; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1810; GFX10PLUS-NEXT: ; return to shader part epilog 1811; 1812; GFX12-LABEL: atomic_add_i32_2dmsaa: 1813; GFX12: ; %bb.0: ; %main_body 1814; GFX12-NEXT: s_mov_b32 s0, s2 1815; GFX12-NEXT: s_mov_b32 s1, s3 1816; GFX12-NEXT: s_mov_b32 s2, s4 1817; GFX12-NEXT: s_mov_b32 s3, s5 1818; GFX12-NEXT: s_mov_b32 s4, s6 1819; GFX12-NEXT: s_mov_b32 s5, s7 1820; GFX12-NEXT: s_mov_b32 s6, s8 1821; GFX12-NEXT: s_mov_b32 s7, s9 1822; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN 1823; GFX12-NEXT: s_wait_loadcnt 0x0 1824; GFX12-NEXT: ; return to shader part epilog 1825main_body: 1826 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1827 %out = bitcast i32 %v to float 1828 ret float %out 1829} 1830 1831define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 1832; GFX6-LABEL: atomic_add_i32_2darraymsaa: 1833; GFX6: ; %bb.0: ; %main_body 1834; GFX6-NEXT: s_mov_b32 s0, s2 1835; GFX6-NEXT: s_mov_b32 s1, s3 1836; GFX6-NEXT: s_mov_b32 s2, s4 1837; GFX6-NEXT: s_mov_b32 s3, s5 1838; GFX6-NEXT: s_mov_b32 s4, s6 1839; GFX6-NEXT: s_mov_b32 s5, s7 1840; GFX6-NEXT: s_mov_b32 s6, s8 1841; GFX6-NEXT: s_mov_b32 s7, s9 1842; GFX6-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da 1843; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1844; GFX6-NEXT: ; return to shader part epilog 1845; 1846; GFX8-LABEL: atomic_add_i32_2darraymsaa: 1847; GFX8: ; %bb.0: ; %main_body 1848; GFX8-NEXT: s_mov_b32 s0, s2 1849; GFX8-NEXT: s_mov_b32 s1, s3 1850; GFX8-NEXT: s_mov_b32 s2, s4 1851; GFX8-NEXT: s_mov_b32 s3, s5 1852; GFX8-NEXT: s_mov_b32 s4, s6 1853; GFX8-NEXT: s_mov_b32 s5, s7 1854; GFX8-NEXT: s_mov_b32 s6, s8 1855; GFX8-NEXT: s_mov_b32 s7, s9 1856; GFX8-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da 1857; GFX8-NEXT: s_waitcnt vmcnt(0) 1858; GFX8-NEXT: ; return to shader part epilog 1859; 1860; GFX900-LABEL: atomic_add_i32_2darraymsaa: 1861; GFX900: ; %bb.0: ; %main_body 1862; GFX900-NEXT: s_mov_b32 s0, s2 1863; GFX900-NEXT: s_mov_b32 s1, s3 1864; GFX900-NEXT: s_mov_b32 s2, s4 1865; GFX900-NEXT: s_mov_b32 s3, s5 1866; GFX900-NEXT: s_mov_b32 s4, s6 1867; GFX900-NEXT: s_mov_b32 s5, s7 1868; GFX900-NEXT: s_mov_b32 s6, s8 1869; GFX900-NEXT: s_mov_b32 s7, s9 1870; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da 1871; GFX900-NEXT: s_waitcnt vmcnt(0) 1872; GFX900-NEXT: ; return to shader part epilog 1873; 1874; GFX90A-LABEL: atomic_add_i32_2darraymsaa: 1875; GFX90A: ; %bb.0: ; %main_body 1876; GFX90A-NEXT: s_mov_b32 s0, s2 1877; GFX90A-NEXT: s_mov_b32 s1, s3 1878; GFX90A-NEXT: s_mov_b32 s2, s4 1879; GFX90A-NEXT: s_mov_b32 s3, s5 1880; GFX90A-NEXT: s_mov_b32 s4, s6 1881; GFX90A-NEXT: s_mov_b32 s5, s7 1882; GFX90A-NEXT: s_mov_b32 s6, s8 1883; GFX90A-NEXT: s_mov_b32 s7, s9 1884; GFX90A-NEXT: v_mov_b32_e32 v6, v1 1885; GFX90A-NEXT: v_mov_b32_e32 v7, v2 1886; GFX90A-NEXT: v_mov_b32_e32 v8, v3 1887; GFX90A-NEXT: v_mov_b32_e32 v9, v4 1888; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da 1889; GFX90A-NEXT: s_waitcnt vmcnt(0) 1890; GFX90A-NEXT: ; return to shader part epilog 1891; 1892; GFX10PLUS-LABEL: atomic_add_i32_2darraymsaa: 1893; GFX10PLUS: ; %bb.0: ; %main_body 1894; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1895; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1896; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1897; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1898; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1899; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1900; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1901; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1902; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc 1903; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1904; GFX10PLUS-NEXT: ; return to shader part epilog 1905; 1906; GFX12-LABEL: atomic_add_i32_2darraymsaa: 1907; GFX12: ; %bb.0: ; %main_body 1908; GFX12-NEXT: s_mov_b32 s0, s2 1909; GFX12-NEXT: s_mov_b32 s1, s3 1910; GFX12-NEXT: s_mov_b32 s2, s4 1911; GFX12-NEXT: s_mov_b32 s3, s5 1912; GFX12-NEXT: s_mov_b32 s4, s6 1913; GFX12-NEXT: s_mov_b32 s5, s7 1914; GFX12-NEXT: s_mov_b32 s6, s8 1915; GFX12-NEXT: s_mov_b32 s7, s9 1916; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN 1917; GFX12-NEXT: s_wait_loadcnt 0x0 1918; GFX12-NEXT: ; return to shader part epilog 1919main_body: 1920 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1921 %out = bitcast i32 %v to float 1922 ret float %out 1923} 1924 1925define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { 1926; GFX6-LABEL: atomic_add_i32_1d_slc: 1927; GFX6: ; %bb.0: ; %main_body 1928; GFX6-NEXT: s_mov_b32 s0, s2 1929; GFX6-NEXT: s_mov_b32 s1, s3 1930; GFX6-NEXT: s_mov_b32 s2, s4 1931; GFX6-NEXT: s_mov_b32 s3, s5 1932; GFX6-NEXT: s_mov_b32 s4, s6 1933; GFX6-NEXT: s_mov_b32 s5, s7 1934; GFX6-NEXT: s_mov_b32 s6, s8 1935; GFX6-NEXT: s_mov_b32 s7, s9 1936; GFX6-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc 1937; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1938; GFX6-NEXT: ; return to shader part epilog 1939; 1940; GFX8-LABEL: atomic_add_i32_1d_slc: 1941; GFX8: ; %bb.0: ; %main_body 1942; GFX8-NEXT: s_mov_b32 s0, s2 1943; GFX8-NEXT: s_mov_b32 s1, s3 1944; GFX8-NEXT: s_mov_b32 s2, s4 1945; GFX8-NEXT: s_mov_b32 s3, s5 1946; GFX8-NEXT: s_mov_b32 s4, s6 1947; GFX8-NEXT: s_mov_b32 s5, s7 1948; GFX8-NEXT: s_mov_b32 s6, s8 1949; GFX8-NEXT: s_mov_b32 s7, s9 1950; GFX8-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc 1951; GFX8-NEXT: s_waitcnt vmcnt(0) 1952; GFX8-NEXT: ; return to shader part epilog 1953; 1954; GFX900-LABEL: atomic_add_i32_1d_slc: 1955; GFX900: ; %bb.0: ; %main_body 1956; GFX900-NEXT: s_mov_b32 s0, s2 1957; GFX900-NEXT: s_mov_b32 s1, s3 1958; GFX900-NEXT: s_mov_b32 s2, s4 1959; GFX900-NEXT: s_mov_b32 s3, s5 1960; GFX900-NEXT: s_mov_b32 s4, s6 1961; GFX900-NEXT: s_mov_b32 s5, s7 1962; GFX900-NEXT: s_mov_b32 s6, s8 1963; GFX900-NEXT: s_mov_b32 s7, s9 1964; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc 1965; GFX900-NEXT: s_waitcnt vmcnt(0) 1966; GFX900-NEXT: ; return to shader part epilog 1967; 1968; GFX90A-LABEL: atomic_add_i32_1d_slc: 1969; GFX90A: ; %bb.0: ; %main_body 1970; GFX90A-NEXT: s_mov_b32 s0, s2 1971; GFX90A-NEXT: s_mov_b32 s1, s3 1972; GFX90A-NEXT: s_mov_b32 s2, s4 1973; GFX90A-NEXT: s_mov_b32 s3, s5 1974; GFX90A-NEXT: s_mov_b32 s4, s6 1975; GFX90A-NEXT: s_mov_b32 s5, s7 1976; GFX90A-NEXT: s_mov_b32 s6, s8 1977; GFX90A-NEXT: s_mov_b32 s7, s9 1978; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1979; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc 1980; GFX90A-NEXT: s_waitcnt vmcnt(0) 1981; GFX90A-NEXT: ; return to shader part epilog 1982; 1983; GFX10PLUS-LABEL: atomic_add_i32_1d_slc: 1984; GFX10PLUS: ; %bb.0: ; %main_body 1985; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1986; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1987; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1988; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1989; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1990; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1991; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1992; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1993; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc 1994; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1995; GFX10PLUS-NEXT: ; return to shader part epilog 1996; 1997; GFX12-LABEL: atomic_add_i32_1d_slc: 1998; GFX12: ; %bb.0: ; %main_body 1999; GFX12-NEXT: s_mov_b32 s0, s2 2000; GFX12-NEXT: s_mov_b32 s1, s3 2001; GFX12-NEXT: s_mov_b32 s2, s4 2002; GFX12-NEXT: s_mov_b32 s3, s5 2003; GFX12-NEXT: s_mov_b32 s4, s6 2004; GFX12-NEXT: s_mov_b32 s5, s7 2005; GFX12-NEXT: s_mov_b32 s6, s8 2006; GFX12-NEXT: s_mov_b32 s7, s9 2007; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN 2008; GFX12-NEXT: s_wait_loadcnt 0x0 2009; GFX12-NEXT: ; return to shader part epilog 2010main_body: 2011 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 2012 %out = bitcast i32 %v to float 2013 ret float %out 2014} 2015 2016define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2017; GFX6-LABEL: atomic_swap_i64_1d: 2018; GFX6: ; %bb.0: ; %main_body 2019; GFX6-NEXT: s_mov_b32 s0, s2 2020; GFX6-NEXT: s_mov_b32 s1, s3 2021; GFX6-NEXT: s_mov_b32 s2, s4 2022; GFX6-NEXT: s_mov_b32 s3, s5 2023; GFX6-NEXT: s_mov_b32 s4, s6 2024; GFX6-NEXT: s_mov_b32 s5, s7 2025; GFX6-NEXT: s_mov_b32 s6, s8 2026; GFX6-NEXT: s_mov_b32 s7, s9 2027; GFX6-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2028; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2029; GFX6-NEXT: ; return to shader part epilog 2030; 2031; GFX8-LABEL: atomic_swap_i64_1d: 2032; GFX8: ; %bb.0: ; %main_body 2033; GFX8-NEXT: s_mov_b32 s0, s2 2034; GFX8-NEXT: s_mov_b32 s1, s3 2035; GFX8-NEXT: s_mov_b32 s2, s4 2036; GFX8-NEXT: s_mov_b32 s3, s5 2037; GFX8-NEXT: s_mov_b32 s4, s6 2038; GFX8-NEXT: s_mov_b32 s5, s7 2039; GFX8-NEXT: s_mov_b32 s6, s8 2040; GFX8-NEXT: s_mov_b32 s7, s9 2041; GFX8-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2042; GFX8-NEXT: s_waitcnt vmcnt(0) 2043; GFX8-NEXT: ; return to shader part epilog 2044; 2045; GFX900-LABEL: atomic_swap_i64_1d: 2046; GFX900: ; %bb.0: ; %main_body 2047; GFX900-NEXT: s_mov_b32 s0, s2 2048; GFX900-NEXT: s_mov_b32 s1, s3 2049; GFX900-NEXT: s_mov_b32 s2, s4 2050; GFX900-NEXT: s_mov_b32 s3, s5 2051; GFX900-NEXT: s_mov_b32 s4, s6 2052; GFX900-NEXT: s_mov_b32 s5, s7 2053; GFX900-NEXT: s_mov_b32 s6, s8 2054; GFX900-NEXT: s_mov_b32 s7, s9 2055; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2056; GFX900-NEXT: s_waitcnt vmcnt(0) 2057; GFX900-NEXT: ; return to shader part epilog 2058; 2059; GFX90A-LABEL: atomic_swap_i64_1d: 2060; GFX90A: ; %bb.0: ; %main_body 2061; GFX90A-NEXT: s_mov_b32 s0, s2 2062; GFX90A-NEXT: s_mov_b32 s1, s3 2063; GFX90A-NEXT: s_mov_b32 s2, s4 2064; GFX90A-NEXT: s_mov_b32 s3, s5 2065; GFX90A-NEXT: s_mov_b32 s4, s6 2066; GFX90A-NEXT: s_mov_b32 s5, s7 2067; GFX90A-NEXT: s_mov_b32 s6, s8 2068; GFX90A-NEXT: s_mov_b32 s7, s9 2069; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2070; GFX90A-NEXT: s_waitcnt vmcnt(0) 2071; GFX90A-NEXT: ; return to shader part epilog 2072; 2073; GFX10PLUS-LABEL: atomic_swap_i64_1d: 2074; GFX10PLUS: ; %bb.0: ; %main_body 2075; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2076; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2077; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2078; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2079; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2080; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2081; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2082; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2083; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2084; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2085; GFX10PLUS-NEXT: ; return to shader part epilog 2086; 2087; GFX12-LABEL: atomic_swap_i64_1d: 2088; GFX12: ; %bb.0: ; %main_body 2089; GFX12-NEXT: s_mov_b32 s0, s2 2090; GFX12-NEXT: s_mov_b32 s1, s3 2091; GFX12-NEXT: s_mov_b32 s2, s4 2092; GFX12-NEXT: s_mov_b32 s3, s5 2093; GFX12-NEXT: s_mov_b32 s4, s6 2094; GFX12-NEXT: s_mov_b32 s5, s7 2095; GFX12-NEXT: s_mov_b32 s6, s8 2096; GFX12-NEXT: s_mov_b32 s7, s9 2097; GFX12-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2098; GFX12-NEXT: s_wait_loadcnt 0x0 2099; GFX12-NEXT: ; return to shader part epilog 2100main_body: 2101 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2102 %out = bitcast i64 %v to <2 x float> 2103 ret <2 x float> %out 2104} 2105 2106define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2107; GFX6-LABEL: atomic_add_i64_1d: 2108; GFX6: ; %bb.0: ; %main_body 2109; GFX6-NEXT: s_mov_b32 s0, s2 2110; GFX6-NEXT: s_mov_b32 s1, s3 2111; GFX6-NEXT: s_mov_b32 s2, s4 2112; GFX6-NEXT: s_mov_b32 s3, s5 2113; GFX6-NEXT: s_mov_b32 s4, s6 2114; GFX6-NEXT: s_mov_b32 s5, s7 2115; GFX6-NEXT: s_mov_b32 s6, s8 2116; GFX6-NEXT: s_mov_b32 s7, s9 2117; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2118; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2119; GFX6-NEXT: ; return to shader part epilog 2120; 2121; GFX8-LABEL: atomic_add_i64_1d: 2122; GFX8: ; %bb.0: ; %main_body 2123; GFX8-NEXT: s_mov_b32 s0, s2 2124; GFX8-NEXT: s_mov_b32 s1, s3 2125; GFX8-NEXT: s_mov_b32 s2, s4 2126; GFX8-NEXT: s_mov_b32 s3, s5 2127; GFX8-NEXT: s_mov_b32 s4, s6 2128; GFX8-NEXT: s_mov_b32 s5, s7 2129; GFX8-NEXT: s_mov_b32 s6, s8 2130; GFX8-NEXT: s_mov_b32 s7, s9 2131; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2132; GFX8-NEXT: s_waitcnt vmcnt(0) 2133; GFX8-NEXT: ; return to shader part epilog 2134; 2135; GFX900-LABEL: atomic_add_i64_1d: 2136; GFX900: ; %bb.0: ; %main_body 2137; GFX900-NEXT: s_mov_b32 s0, s2 2138; GFX900-NEXT: s_mov_b32 s1, s3 2139; GFX900-NEXT: s_mov_b32 s2, s4 2140; GFX900-NEXT: s_mov_b32 s3, s5 2141; GFX900-NEXT: s_mov_b32 s4, s6 2142; GFX900-NEXT: s_mov_b32 s5, s7 2143; GFX900-NEXT: s_mov_b32 s6, s8 2144; GFX900-NEXT: s_mov_b32 s7, s9 2145; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2146; GFX900-NEXT: s_waitcnt vmcnt(0) 2147; GFX900-NEXT: ; return to shader part epilog 2148; 2149; GFX90A-LABEL: atomic_add_i64_1d: 2150; GFX90A: ; %bb.0: ; %main_body 2151; GFX90A-NEXT: s_mov_b32 s0, s2 2152; GFX90A-NEXT: s_mov_b32 s1, s3 2153; GFX90A-NEXT: s_mov_b32 s2, s4 2154; GFX90A-NEXT: s_mov_b32 s3, s5 2155; GFX90A-NEXT: s_mov_b32 s4, s6 2156; GFX90A-NEXT: s_mov_b32 s5, s7 2157; GFX90A-NEXT: s_mov_b32 s6, s8 2158; GFX90A-NEXT: s_mov_b32 s7, s9 2159; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2160; GFX90A-NEXT: s_waitcnt vmcnt(0) 2161; GFX90A-NEXT: ; return to shader part epilog 2162; 2163; GFX10PLUS-LABEL: atomic_add_i64_1d: 2164; GFX10PLUS: ; %bb.0: ; %main_body 2165; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2166; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2167; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2168; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2169; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2170; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2171; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2172; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2173; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2174; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2175; GFX10PLUS-NEXT: ; return to shader part epilog 2176; 2177; GFX12-LABEL: atomic_add_i64_1d: 2178; GFX12: ; %bb.0: ; %main_body 2179; GFX12-NEXT: s_mov_b32 s0, s2 2180; GFX12-NEXT: s_mov_b32 s1, s3 2181; GFX12-NEXT: s_mov_b32 s2, s4 2182; GFX12-NEXT: s_mov_b32 s3, s5 2183; GFX12-NEXT: s_mov_b32 s4, s6 2184; GFX12-NEXT: s_mov_b32 s5, s7 2185; GFX12-NEXT: s_mov_b32 s6, s8 2186; GFX12-NEXT: s_mov_b32 s7, s9 2187; GFX12-NEXT: image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2188; GFX12-NEXT: s_wait_loadcnt 0x0 2189; GFX12-NEXT: ; return to shader part epilog 2190main_body: 2191 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2192 %out = bitcast i64 %v to <2 x float> 2193 ret <2 x float> %out 2194} 2195 2196define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2197; GFX6-LABEL: atomic_sub_i64_1d: 2198; GFX6: ; %bb.0: ; %main_body 2199; GFX6-NEXT: s_mov_b32 s0, s2 2200; GFX6-NEXT: s_mov_b32 s1, s3 2201; GFX6-NEXT: s_mov_b32 s2, s4 2202; GFX6-NEXT: s_mov_b32 s3, s5 2203; GFX6-NEXT: s_mov_b32 s4, s6 2204; GFX6-NEXT: s_mov_b32 s5, s7 2205; GFX6-NEXT: s_mov_b32 s6, s8 2206; GFX6-NEXT: s_mov_b32 s7, s9 2207; GFX6-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2208; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2209; GFX6-NEXT: ; return to shader part epilog 2210; 2211; GFX8-LABEL: atomic_sub_i64_1d: 2212; GFX8: ; %bb.0: ; %main_body 2213; GFX8-NEXT: s_mov_b32 s0, s2 2214; GFX8-NEXT: s_mov_b32 s1, s3 2215; GFX8-NEXT: s_mov_b32 s2, s4 2216; GFX8-NEXT: s_mov_b32 s3, s5 2217; GFX8-NEXT: s_mov_b32 s4, s6 2218; GFX8-NEXT: s_mov_b32 s5, s7 2219; GFX8-NEXT: s_mov_b32 s6, s8 2220; GFX8-NEXT: s_mov_b32 s7, s9 2221; GFX8-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2222; GFX8-NEXT: s_waitcnt vmcnt(0) 2223; GFX8-NEXT: ; return to shader part epilog 2224; 2225; GFX900-LABEL: atomic_sub_i64_1d: 2226; GFX900: ; %bb.0: ; %main_body 2227; GFX900-NEXT: s_mov_b32 s0, s2 2228; GFX900-NEXT: s_mov_b32 s1, s3 2229; GFX900-NEXT: s_mov_b32 s2, s4 2230; GFX900-NEXT: s_mov_b32 s3, s5 2231; GFX900-NEXT: s_mov_b32 s4, s6 2232; GFX900-NEXT: s_mov_b32 s5, s7 2233; GFX900-NEXT: s_mov_b32 s6, s8 2234; GFX900-NEXT: s_mov_b32 s7, s9 2235; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2236; GFX900-NEXT: s_waitcnt vmcnt(0) 2237; GFX900-NEXT: ; return to shader part epilog 2238; 2239; GFX90A-LABEL: atomic_sub_i64_1d: 2240; GFX90A: ; %bb.0: ; %main_body 2241; GFX90A-NEXT: s_mov_b32 s0, s2 2242; GFX90A-NEXT: s_mov_b32 s1, s3 2243; GFX90A-NEXT: s_mov_b32 s2, s4 2244; GFX90A-NEXT: s_mov_b32 s3, s5 2245; GFX90A-NEXT: s_mov_b32 s4, s6 2246; GFX90A-NEXT: s_mov_b32 s5, s7 2247; GFX90A-NEXT: s_mov_b32 s6, s8 2248; GFX90A-NEXT: s_mov_b32 s7, s9 2249; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2250; GFX90A-NEXT: s_waitcnt vmcnt(0) 2251; GFX90A-NEXT: ; return to shader part epilog 2252; 2253; GFX10PLUS-LABEL: atomic_sub_i64_1d: 2254; GFX10PLUS: ; %bb.0: ; %main_body 2255; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2256; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2257; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2258; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2259; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2260; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2261; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2262; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2263; GFX10PLUS-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2264; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2265; GFX10PLUS-NEXT: ; return to shader part epilog 2266; 2267; GFX12-LABEL: atomic_sub_i64_1d: 2268; GFX12: ; %bb.0: ; %main_body 2269; GFX12-NEXT: s_mov_b32 s0, s2 2270; GFX12-NEXT: s_mov_b32 s1, s3 2271; GFX12-NEXT: s_mov_b32 s2, s4 2272; GFX12-NEXT: s_mov_b32 s3, s5 2273; GFX12-NEXT: s_mov_b32 s4, s6 2274; GFX12-NEXT: s_mov_b32 s5, s7 2275; GFX12-NEXT: s_mov_b32 s6, s8 2276; GFX12-NEXT: s_mov_b32 s7, s9 2277; GFX12-NEXT: image_atomic_sub_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2278; GFX12-NEXT: s_wait_loadcnt 0x0 2279; GFX12-NEXT: ; return to shader part epilog 2280main_body: 2281 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2282 %out = bitcast i64 %v to <2 x float> 2283 ret <2 x float> %out 2284} 2285 2286define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2287; GFX6-LABEL: atomic_smin_i64_1d: 2288; GFX6: ; %bb.0: ; %main_body 2289; GFX6-NEXT: s_mov_b32 s0, s2 2290; GFX6-NEXT: s_mov_b32 s1, s3 2291; GFX6-NEXT: s_mov_b32 s2, s4 2292; GFX6-NEXT: s_mov_b32 s3, s5 2293; GFX6-NEXT: s_mov_b32 s4, s6 2294; GFX6-NEXT: s_mov_b32 s5, s7 2295; GFX6-NEXT: s_mov_b32 s6, s8 2296; GFX6-NEXT: s_mov_b32 s7, s9 2297; GFX6-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2298; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2299; GFX6-NEXT: ; return to shader part epilog 2300; 2301; GFX8-LABEL: atomic_smin_i64_1d: 2302; GFX8: ; %bb.0: ; %main_body 2303; GFX8-NEXT: s_mov_b32 s0, s2 2304; GFX8-NEXT: s_mov_b32 s1, s3 2305; GFX8-NEXT: s_mov_b32 s2, s4 2306; GFX8-NEXT: s_mov_b32 s3, s5 2307; GFX8-NEXT: s_mov_b32 s4, s6 2308; GFX8-NEXT: s_mov_b32 s5, s7 2309; GFX8-NEXT: s_mov_b32 s6, s8 2310; GFX8-NEXT: s_mov_b32 s7, s9 2311; GFX8-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2312; GFX8-NEXT: s_waitcnt vmcnt(0) 2313; GFX8-NEXT: ; return to shader part epilog 2314; 2315; GFX900-LABEL: atomic_smin_i64_1d: 2316; GFX900: ; %bb.0: ; %main_body 2317; GFX900-NEXT: s_mov_b32 s0, s2 2318; GFX900-NEXT: s_mov_b32 s1, s3 2319; GFX900-NEXT: s_mov_b32 s2, s4 2320; GFX900-NEXT: s_mov_b32 s3, s5 2321; GFX900-NEXT: s_mov_b32 s4, s6 2322; GFX900-NEXT: s_mov_b32 s5, s7 2323; GFX900-NEXT: s_mov_b32 s6, s8 2324; GFX900-NEXT: s_mov_b32 s7, s9 2325; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2326; GFX900-NEXT: s_waitcnt vmcnt(0) 2327; GFX900-NEXT: ; return to shader part epilog 2328; 2329; GFX90A-LABEL: atomic_smin_i64_1d: 2330; GFX90A: ; %bb.0: ; %main_body 2331; GFX90A-NEXT: s_mov_b32 s0, s2 2332; GFX90A-NEXT: s_mov_b32 s1, s3 2333; GFX90A-NEXT: s_mov_b32 s2, s4 2334; GFX90A-NEXT: s_mov_b32 s3, s5 2335; GFX90A-NEXT: s_mov_b32 s4, s6 2336; GFX90A-NEXT: s_mov_b32 s5, s7 2337; GFX90A-NEXT: s_mov_b32 s6, s8 2338; GFX90A-NEXT: s_mov_b32 s7, s9 2339; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2340; GFX90A-NEXT: s_waitcnt vmcnt(0) 2341; GFX90A-NEXT: ; return to shader part epilog 2342; 2343; GFX10PLUS-LABEL: atomic_smin_i64_1d: 2344; GFX10PLUS: ; %bb.0: ; %main_body 2345; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2346; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2347; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2348; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2349; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2350; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2351; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2352; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2353; GFX10PLUS-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2354; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2355; GFX10PLUS-NEXT: ; return to shader part epilog 2356; 2357; GFX12-LABEL: atomic_smin_i64_1d: 2358; GFX12: ; %bb.0: ; %main_body 2359; GFX12-NEXT: s_mov_b32 s0, s2 2360; GFX12-NEXT: s_mov_b32 s1, s3 2361; GFX12-NEXT: s_mov_b32 s2, s4 2362; GFX12-NEXT: s_mov_b32 s3, s5 2363; GFX12-NEXT: s_mov_b32 s4, s6 2364; GFX12-NEXT: s_mov_b32 s5, s7 2365; GFX12-NEXT: s_mov_b32 s6, s8 2366; GFX12-NEXT: s_mov_b32 s7, s9 2367; GFX12-NEXT: image_atomic_min_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2368; GFX12-NEXT: s_wait_loadcnt 0x0 2369; GFX12-NEXT: ; return to shader part epilog 2370main_body: 2371 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2372 %out = bitcast i64 %v to <2 x float> 2373 ret <2 x float> %out 2374} 2375 2376define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2377; GFX6-LABEL: atomic_umin_i64_1d: 2378; GFX6: ; %bb.0: ; %main_body 2379; GFX6-NEXT: s_mov_b32 s0, s2 2380; GFX6-NEXT: s_mov_b32 s1, s3 2381; GFX6-NEXT: s_mov_b32 s2, s4 2382; GFX6-NEXT: s_mov_b32 s3, s5 2383; GFX6-NEXT: s_mov_b32 s4, s6 2384; GFX6-NEXT: s_mov_b32 s5, s7 2385; GFX6-NEXT: s_mov_b32 s6, s8 2386; GFX6-NEXT: s_mov_b32 s7, s9 2387; GFX6-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2388; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2389; GFX6-NEXT: ; return to shader part epilog 2390; 2391; GFX8-LABEL: atomic_umin_i64_1d: 2392; GFX8: ; %bb.0: ; %main_body 2393; GFX8-NEXT: s_mov_b32 s0, s2 2394; GFX8-NEXT: s_mov_b32 s1, s3 2395; GFX8-NEXT: s_mov_b32 s2, s4 2396; GFX8-NEXT: s_mov_b32 s3, s5 2397; GFX8-NEXT: s_mov_b32 s4, s6 2398; GFX8-NEXT: s_mov_b32 s5, s7 2399; GFX8-NEXT: s_mov_b32 s6, s8 2400; GFX8-NEXT: s_mov_b32 s7, s9 2401; GFX8-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2402; GFX8-NEXT: s_waitcnt vmcnt(0) 2403; GFX8-NEXT: ; return to shader part epilog 2404; 2405; GFX900-LABEL: atomic_umin_i64_1d: 2406; GFX900: ; %bb.0: ; %main_body 2407; GFX900-NEXT: s_mov_b32 s0, s2 2408; GFX900-NEXT: s_mov_b32 s1, s3 2409; GFX900-NEXT: s_mov_b32 s2, s4 2410; GFX900-NEXT: s_mov_b32 s3, s5 2411; GFX900-NEXT: s_mov_b32 s4, s6 2412; GFX900-NEXT: s_mov_b32 s5, s7 2413; GFX900-NEXT: s_mov_b32 s6, s8 2414; GFX900-NEXT: s_mov_b32 s7, s9 2415; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2416; GFX900-NEXT: s_waitcnt vmcnt(0) 2417; GFX900-NEXT: ; return to shader part epilog 2418; 2419; GFX90A-LABEL: atomic_umin_i64_1d: 2420; GFX90A: ; %bb.0: ; %main_body 2421; GFX90A-NEXT: s_mov_b32 s0, s2 2422; GFX90A-NEXT: s_mov_b32 s1, s3 2423; GFX90A-NEXT: s_mov_b32 s2, s4 2424; GFX90A-NEXT: s_mov_b32 s3, s5 2425; GFX90A-NEXT: s_mov_b32 s4, s6 2426; GFX90A-NEXT: s_mov_b32 s5, s7 2427; GFX90A-NEXT: s_mov_b32 s6, s8 2428; GFX90A-NEXT: s_mov_b32 s7, s9 2429; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2430; GFX90A-NEXT: s_waitcnt vmcnt(0) 2431; GFX90A-NEXT: ; return to shader part epilog 2432; 2433; GFX10PLUS-LABEL: atomic_umin_i64_1d: 2434; GFX10PLUS: ; %bb.0: ; %main_body 2435; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2436; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2437; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2438; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2439; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2440; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2441; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2442; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2443; GFX10PLUS-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2444; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2445; GFX10PLUS-NEXT: ; return to shader part epilog 2446; 2447; GFX12-LABEL: atomic_umin_i64_1d: 2448; GFX12: ; %bb.0: ; %main_body 2449; GFX12-NEXT: s_mov_b32 s0, s2 2450; GFX12-NEXT: s_mov_b32 s1, s3 2451; GFX12-NEXT: s_mov_b32 s2, s4 2452; GFX12-NEXT: s_mov_b32 s3, s5 2453; GFX12-NEXT: s_mov_b32 s4, s6 2454; GFX12-NEXT: s_mov_b32 s5, s7 2455; GFX12-NEXT: s_mov_b32 s6, s8 2456; GFX12-NEXT: s_mov_b32 s7, s9 2457; GFX12-NEXT: image_atomic_min_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2458; GFX12-NEXT: s_wait_loadcnt 0x0 2459; GFX12-NEXT: ; return to shader part epilog 2460main_body: 2461 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2462 %out = bitcast i64 %v to <2 x float> 2463 ret <2 x float> %out 2464} 2465 2466define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2467; GFX6-LABEL: atomic_smax_i64_1d: 2468; GFX6: ; %bb.0: ; %main_body 2469; GFX6-NEXT: s_mov_b32 s0, s2 2470; GFX6-NEXT: s_mov_b32 s1, s3 2471; GFX6-NEXT: s_mov_b32 s2, s4 2472; GFX6-NEXT: s_mov_b32 s3, s5 2473; GFX6-NEXT: s_mov_b32 s4, s6 2474; GFX6-NEXT: s_mov_b32 s5, s7 2475; GFX6-NEXT: s_mov_b32 s6, s8 2476; GFX6-NEXT: s_mov_b32 s7, s9 2477; GFX6-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2478; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2479; GFX6-NEXT: ; return to shader part epilog 2480; 2481; GFX8-LABEL: atomic_smax_i64_1d: 2482; GFX8: ; %bb.0: ; %main_body 2483; GFX8-NEXT: s_mov_b32 s0, s2 2484; GFX8-NEXT: s_mov_b32 s1, s3 2485; GFX8-NEXT: s_mov_b32 s2, s4 2486; GFX8-NEXT: s_mov_b32 s3, s5 2487; GFX8-NEXT: s_mov_b32 s4, s6 2488; GFX8-NEXT: s_mov_b32 s5, s7 2489; GFX8-NEXT: s_mov_b32 s6, s8 2490; GFX8-NEXT: s_mov_b32 s7, s9 2491; GFX8-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2492; GFX8-NEXT: s_waitcnt vmcnt(0) 2493; GFX8-NEXT: ; return to shader part epilog 2494; 2495; GFX900-LABEL: atomic_smax_i64_1d: 2496; GFX900: ; %bb.0: ; %main_body 2497; GFX900-NEXT: s_mov_b32 s0, s2 2498; GFX900-NEXT: s_mov_b32 s1, s3 2499; GFX900-NEXT: s_mov_b32 s2, s4 2500; GFX900-NEXT: s_mov_b32 s3, s5 2501; GFX900-NEXT: s_mov_b32 s4, s6 2502; GFX900-NEXT: s_mov_b32 s5, s7 2503; GFX900-NEXT: s_mov_b32 s6, s8 2504; GFX900-NEXT: s_mov_b32 s7, s9 2505; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2506; GFX900-NEXT: s_waitcnt vmcnt(0) 2507; GFX900-NEXT: ; return to shader part epilog 2508; 2509; GFX90A-LABEL: atomic_smax_i64_1d: 2510; GFX90A: ; %bb.0: ; %main_body 2511; GFX90A-NEXT: s_mov_b32 s0, s2 2512; GFX90A-NEXT: s_mov_b32 s1, s3 2513; GFX90A-NEXT: s_mov_b32 s2, s4 2514; GFX90A-NEXT: s_mov_b32 s3, s5 2515; GFX90A-NEXT: s_mov_b32 s4, s6 2516; GFX90A-NEXT: s_mov_b32 s5, s7 2517; GFX90A-NEXT: s_mov_b32 s6, s8 2518; GFX90A-NEXT: s_mov_b32 s7, s9 2519; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2520; GFX90A-NEXT: s_waitcnt vmcnt(0) 2521; GFX90A-NEXT: ; return to shader part epilog 2522; 2523; GFX10PLUS-LABEL: atomic_smax_i64_1d: 2524; GFX10PLUS: ; %bb.0: ; %main_body 2525; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2526; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2527; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2528; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2529; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2530; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2531; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2532; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2533; GFX10PLUS-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2534; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2535; GFX10PLUS-NEXT: ; return to shader part epilog 2536; 2537; GFX12-LABEL: atomic_smax_i64_1d: 2538; GFX12: ; %bb.0: ; %main_body 2539; GFX12-NEXT: s_mov_b32 s0, s2 2540; GFX12-NEXT: s_mov_b32 s1, s3 2541; GFX12-NEXT: s_mov_b32 s2, s4 2542; GFX12-NEXT: s_mov_b32 s3, s5 2543; GFX12-NEXT: s_mov_b32 s4, s6 2544; GFX12-NEXT: s_mov_b32 s5, s7 2545; GFX12-NEXT: s_mov_b32 s6, s8 2546; GFX12-NEXT: s_mov_b32 s7, s9 2547; GFX12-NEXT: image_atomic_max_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2548; GFX12-NEXT: s_wait_loadcnt 0x0 2549; GFX12-NEXT: ; return to shader part epilog 2550main_body: 2551 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2552 %out = bitcast i64 %v to <2 x float> 2553 ret <2 x float> %out 2554} 2555 2556define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2557; GFX6-LABEL: atomic_umax_i64_1d: 2558; GFX6: ; %bb.0: ; %main_body 2559; GFX6-NEXT: s_mov_b32 s0, s2 2560; GFX6-NEXT: s_mov_b32 s1, s3 2561; GFX6-NEXT: s_mov_b32 s2, s4 2562; GFX6-NEXT: s_mov_b32 s3, s5 2563; GFX6-NEXT: s_mov_b32 s4, s6 2564; GFX6-NEXT: s_mov_b32 s5, s7 2565; GFX6-NEXT: s_mov_b32 s6, s8 2566; GFX6-NEXT: s_mov_b32 s7, s9 2567; GFX6-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2568; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2569; GFX6-NEXT: ; return to shader part epilog 2570; 2571; GFX8-LABEL: atomic_umax_i64_1d: 2572; GFX8: ; %bb.0: ; %main_body 2573; GFX8-NEXT: s_mov_b32 s0, s2 2574; GFX8-NEXT: s_mov_b32 s1, s3 2575; GFX8-NEXT: s_mov_b32 s2, s4 2576; GFX8-NEXT: s_mov_b32 s3, s5 2577; GFX8-NEXT: s_mov_b32 s4, s6 2578; GFX8-NEXT: s_mov_b32 s5, s7 2579; GFX8-NEXT: s_mov_b32 s6, s8 2580; GFX8-NEXT: s_mov_b32 s7, s9 2581; GFX8-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2582; GFX8-NEXT: s_waitcnt vmcnt(0) 2583; GFX8-NEXT: ; return to shader part epilog 2584; 2585; GFX900-LABEL: atomic_umax_i64_1d: 2586; GFX900: ; %bb.0: ; %main_body 2587; GFX900-NEXT: s_mov_b32 s0, s2 2588; GFX900-NEXT: s_mov_b32 s1, s3 2589; GFX900-NEXT: s_mov_b32 s2, s4 2590; GFX900-NEXT: s_mov_b32 s3, s5 2591; GFX900-NEXT: s_mov_b32 s4, s6 2592; GFX900-NEXT: s_mov_b32 s5, s7 2593; GFX900-NEXT: s_mov_b32 s6, s8 2594; GFX900-NEXT: s_mov_b32 s7, s9 2595; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2596; GFX900-NEXT: s_waitcnt vmcnt(0) 2597; GFX900-NEXT: ; return to shader part epilog 2598; 2599; GFX90A-LABEL: atomic_umax_i64_1d: 2600; GFX90A: ; %bb.0: ; %main_body 2601; GFX90A-NEXT: s_mov_b32 s0, s2 2602; GFX90A-NEXT: s_mov_b32 s1, s3 2603; GFX90A-NEXT: s_mov_b32 s2, s4 2604; GFX90A-NEXT: s_mov_b32 s3, s5 2605; GFX90A-NEXT: s_mov_b32 s4, s6 2606; GFX90A-NEXT: s_mov_b32 s5, s7 2607; GFX90A-NEXT: s_mov_b32 s6, s8 2608; GFX90A-NEXT: s_mov_b32 s7, s9 2609; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2610; GFX90A-NEXT: s_waitcnt vmcnt(0) 2611; GFX90A-NEXT: ; return to shader part epilog 2612; 2613; GFX10PLUS-LABEL: atomic_umax_i64_1d: 2614; GFX10PLUS: ; %bb.0: ; %main_body 2615; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2616; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2617; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2618; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2619; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2620; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2621; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2622; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2623; GFX10PLUS-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2624; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2625; GFX10PLUS-NEXT: ; return to shader part epilog 2626; 2627; GFX12-LABEL: atomic_umax_i64_1d: 2628; GFX12: ; %bb.0: ; %main_body 2629; GFX12-NEXT: s_mov_b32 s0, s2 2630; GFX12-NEXT: s_mov_b32 s1, s3 2631; GFX12-NEXT: s_mov_b32 s2, s4 2632; GFX12-NEXT: s_mov_b32 s3, s5 2633; GFX12-NEXT: s_mov_b32 s4, s6 2634; GFX12-NEXT: s_mov_b32 s5, s7 2635; GFX12-NEXT: s_mov_b32 s6, s8 2636; GFX12-NEXT: s_mov_b32 s7, s9 2637; GFX12-NEXT: image_atomic_max_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2638; GFX12-NEXT: s_wait_loadcnt 0x0 2639; GFX12-NEXT: ; return to shader part epilog 2640main_body: 2641 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2642 %out = bitcast i64 %v to <2 x float> 2643 ret <2 x float> %out 2644} 2645 2646define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2647; GFX6-LABEL: atomic_and_i64_1d: 2648; GFX6: ; %bb.0: ; %main_body 2649; GFX6-NEXT: s_mov_b32 s0, s2 2650; GFX6-NEXT: s_mov_b32 s1, s3 2651; GFX6-NEXT: s_mov_b32 s2, s4 2652; GFX6-NEXT: s_mov_b32 s3, s5 2653; GFX6-NEXT: s_mov_b32 s4, s6 2654; GFX6-NEXT: s_mov_b32 s5, s7 2655; GFX6-NEXT: s_mov_b32 s6, s8 2656; GFX6-NEXT: s_mov_b32 s7, s9 2657; GFX6-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2658; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2659; GFX6-NEXT: ; return to shader part epilog 2660; 2661; GFX8-LABEL: atomic_and_i64_1d: 2662; GFX8: ; %bb.0: ; %main_body 2663; GFX8-NEXT: s_mov_b32 s0, s2 2664; GFX8-NEXT: s_mov_b32 s1, s3 2665; GFX8-NEXT: s_mov_b32 s2, s4 2666; GFX8-NEXT: s_mov_b32 s3, s5 2667; GFX8-NEXT: s_mov_b32 s4, s6 2668; GFX8-NEXT: s_mov_b32 s5, s7 2669; GFX8-NEXT: s_mov_b32 s6, s8 2670; GFX8-NEXT: s_mov_b32 s7, s9 2671; GFX8-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2672; GFX8-NEXT: s_waitcnt vmcnt(0) 2673; GFX8-NEXT: ; return to shader part epilog 2674; 2675; GFX900-LABEL: atomic_and_i64_1d: 2676; GFX900: ; %bb.0: ; %main_body 2677; GFX900-NEXT: s_mov_b32 s0, s2 2678; GFX900-NEXT: s_mov_b32 s1, s3 2679; GFX900-NEXT: s_mov_b32 s2, s4 2680; GFX900-NEXT: s_mov_b32 s3, s5 2681; GFX900-NEXT: s_mov_b32 s4, s6 2682; GFX900-NEXT: s_mov_b32 s5, s7 2683; GFX900-NEXT: s_mov_b32 s6, s8 2684; GFX900-NEXT: s_mov_b32 s7, s9 2685; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2686; GFX900-NEXT: s_waitcnt vmcnt(0) 2687; GFX900-NEXT: ; return to shader part epilog 2688; 2689; GFX90A-LABEL: atomic_and_i64_1d: 2690; GFX90A: ; %bb.0: ; %main_body 2691; GFX90A-NEXT: s_mov_b32 s0, s2 2692; GFX90A-NEXT: s_mov_b32 s1, s3 2693; GFX90A-NEXT: s_mov_b32 s2, s4 2694; GFX90A-NEXT: s_mov_b32 s3, s5 2695; GFX90A-NEXT: s_mov_b32 s4, s6 2696; GFX90A-NEXT: s_mov_b32 s5, s7 2697; GFX90A-NEXT: s_mov_b32 s6, s8 2698; GFX90A-NEXT: s_mov_b32 s7, s9 2699; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2700; GFX90A-NEXT: s_waitcnt vmcnt(0) 2701; GFX90A-NEXT: ; return to shader part epilog 2702; 2703; GFX10PLUS-LABEL: atomic_and_i64_1d: 2704; GFX10PLUS: ; %bb.0: ; %main_body 2705; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2706; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2707; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2708; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2709; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2710; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2711; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2712; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2713; GFX10PLUS-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2714; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2715; GFX10PLUS-NEXT: ; return to shader part epilog 2716; 2717; GFX12-LABEL: atomic_and_i64_1d: 2718; GFX12: ; %bb.0: ; %main_body 2719; GFX12-NEXT: s_mov_b32 s0, s2 2720; GFX12-NEXT: s_mov_b32 s1, s3 2721; GFX12-NEXT: s_mov_b32 s2, s4 2722; GFX12-NEXT: s_mov_b32 s3, s5 2723; GFX12-NEXT: s_mov_b32 s4, s6 2724; GFX12-NEXT: s_mov_b32 s5, s7 2725; GFX12-NEXT: s_mov_b32 s6, s8 2726; GFX12-NEXT: s_mov_b32 s7, s9 2727; GFX12-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2728; GFX12-NEXT: s_wait_loadcnt 0x0 2729; GFX12-NEXT: ; return to shader part epilog 2730main_body: 2731 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2732 %out = bitcast i64 %v to <2 x float> 2733 ret <2 x float> %out 2734} 2735 2736define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2737; GFX6-LABEL: atomic_or_i64_1d: 2738; GFX6: ; %bb.0: ; %main_body 2739; GFX6-NEXT: s_mov_b32 s0, s2 2740; GFX6-NEXT: s_mov_b32 s1, s3 2741; GFX6-NEXT: s_mov_b32 s2, s4 2742; GFX6-NEXT: s_mov_b32 s3, s5 2743; GFX6-NEXT: s_mov_b32 s4, s6 2744; GFX6-NEXT: s_mov_b32 s5, s7 2745; GFX6-NEXT: s_mov_b32 s6, s8 2746; GFX6-NEXT: s_mov_b32 s7, s9 2747; GFX6-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2748; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2749; GFX6-NEXT: ; return to shader part epilog 2750; 2751; GFX8-LABEL: atomic_or_i64_1d: 2752; GFX8: ; %bb.0: ; %main_body 2753; GFX8-NEXT: s_mov_b32 s0, s2 2754; GFX8-NEXT: s_mov_b32 s1, s3 2755; GFX8-NEXT: s_mov_b32 s2, s4 2756; GFX8-NEXT: s_mov_b32 s3, s5 2757; GFX8-NEXT: s_mov_b32 s4, s6 2758; GFX8-NEXT: s_mov_b32 s5, s7 2759; GFX8-NEXT: s_mov_b32 s6, s8 2760; GFX8-NEXT: s_mov_b32 s7, s9 2761; GFX8-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2762; GFX8-NEXT: s_waitcnt vmcnt(0) 2763; GFX8-NEXT: ; return to shader part epilog 2764; 2765; GFX900-LABEL: atomic_or_i64_1d: 2766; GFX900: ; %bb.0: ; %main_body 2767; GFX900-NEXT: s_mov_b32 s0, s2 2768; GFX900-NEXT: s_mov_b32 s1, s3 2769; GFX900-NEXT: s_mov_b32 s2, s4 2770; GFX900-NEXT: s_mov_b32 s3, s5 2771; GFX900-NEXT: s_mov_b32 s4, s6 2772; GFX900-NEXT: s_mov_b32 s5, s7 2773; GFX900-NEXT: s_mov_b32 s6, s8 2774; GFX900-NEXT: s_mov_b32 s7, s9 2775; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2776; GFX900-NEXT: s_waitcnt vmcnt(0) 2777; GFX900-NEXT: ; return to shader part epilog 2778; 2779; GFX90A-LABEL: atomic_or_i64_1d: 2780; GFX90A: ; %bb.0: ; %main_body 2781; GFX90A-NEXT: s_mov_b32 s0, s2 2782; GFX90A-NEXT: s_mov_b32 s1, s3 2783; GFX90A-NEXT: s_mov_b32 s2, s4 2784; GFX90A-NEXT: s_mov_b32 s3, s5 2785; GFX90A-NEXT: s_mov_b32 s4, s6 2786; GFX90A-NEXT: s_mov_b32 s5, s7 2787; GFX90A-NEXT: s_mov_b32 s6, s8 2788; GFX90A-NEXT: s_mov_b32 s7, s9 2789; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2790; GFX90A-NEXT: s_waitcnt vmcnt(0) 2791; GFX90A-NEXT: ; return to shader part epilog 2792; 2793; GFX10PLUS-LABEL: atomic_or_i64_1d: 2794; GFX10PLUS: ; %bb.0: ; %main_body 2795; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2796; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2797; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2798; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2799; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2800; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2801; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2802; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2803; GFX10PLUS-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2804; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2805; GFX10PLUS-NEXT: ; return to shader part epilog 2806; 2807; GFX12-LABEL: atomic_or_i64_1d: 2808; GFX12: ; %bb.0: ; %main_body 2809; GFX12-NEXT: s_mov_b32 s0, s2 2810; GFX12-NEXT: s_mov_b32 s1, s3 2811; GFX12-NEXT: s_mov_b32 s2, s4 2812; GFX12-NEXT: s_mov_b32 s3, s5 2813; GFX12-NEXT: s_mov_b32 s4, s6 2814; GFX12-NEXT: s_mov_b32 s5, s7 2815; GFX12-NEXT: s_mov_b32 s6, s8 2816; GFX12-NEXT: s_mov_b32 s7, s9 2817; GFX12-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2818; GFX12-NEXT: s_wait_loadcnt 0x0 2819; GFX12-NEXT: ; return to shader part epilog 2820main_body: 2821 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2822 %out = bitcast i64 %v to <2 x float> 2823 ret <2 x float> %out 2824} 2825 2826define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2827; GFX6-LABEL: atomic_xor_i64_1d: 2828; GFX6: ; %bb.0: ; %main_body 2829; GFX6-NEXT: s_mov_b32 s0, s2 2830; GFX6-NEXT: s_mov_b32 s1, s3 2831; GFX6-NEXT: s_mov_b32 s2, s4 2832; GFX6-NEXT: s_mov_b32 s3, s5 2833; GFX6-NEXT: s_mov_b32 s4, s6 2834; GFX6-NEXT: s_mov_b32 s5, s7 2835; GFX6-NEXT: s_mov_b32 s6, s8 2836; GFX6-NEXT: s_mov_b32 s7, s9 2837; GFX6-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2838; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2839; GFX6-NEXT: ; return to shader part epilog 2840; 2841; GFX8-LABEL: atomic_xor_i64_1d: 2842; GFX8: ; %bb.0: ; %main_body 2843; GFX8-NEXT: s_mov_b32 s0, s2 2844; GFX8-NEXT: s_mov_b32 s1, s3 2845; GFX8-NEXT: s_mov_b32 s2, s4 2846; GFX8-NEXT: s_mov_b32 s3, s5 2847; GFX8-NEXT: s_mov_b32 s4, s6 2848; GFX8-NEXT: s_mov_b32 s5, s7 2849; GFX8-NEXT: s_mov_b32 s6, s8 2850; GFX8-NEXT: s_mov_b32 s7, s9 2851; GFX8-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2852; GFX8-NEXT: s_waitcnt vmcnt(0) 2853; GFX8-NEXT: ; return to shader part epilog 2854; 2855; GFX900-LABEL: atomic_xor_i64_1d: 2856; GFX900: ; %bb.0: ; %main_body 2857; GFX900-NEXT: s_mov_b32 s0, s2 2858; GFX900-NEXT: s_mov_b32 s1, s3 2859; GFX900-NEXT: s_mov_b32 s2, s4 2860; GFX900-NEXT: s_mov_b32 s3, s5 2861; GFX900-NEXT: s_mov_b32 s4, s6 2862; GFX900-NEXT: s_mov_b32 s5, s7 2863; GFX900-NEXT: s_mov_b32 s6, s8 2864; GFX900-NEXT: s_mov_b32 s7, s9 2865; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2866; GFX900-NEXT: s_waitcnt vmcnt(0) 2867; GFX900-NEXT: ; return to shader part epilog 2868; 2869; GFX90A-LABEL: atomic_xor_i64_1d: 2870; GFX90A: ; %bb.0: ; %main_body 2871; GFX90A-NEXT: s_mov_b32 s0, s2 2872; GFX90A-NEXT: s_mov_b32 s1, s3 2873; GFX90A-NEXT: s_mov_b32 s2, s4 2874; GFX90A-NEXT: s_mov_b32 s3, s5 2875; GFX90A-NEXT: s_mov_b32 s4, s6 2876; GFX90A-NEXT: s_mov_b32 s5, s7 2877; GFX90A-NEXT: s_mov_b32 s6, s8 2878; GFX90A-NEXT: s_mov_b32 s7, s9 2879; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2880; GFX90A-NEXT: s_waitcnt vmcnt(0) 2881; GFX90A-NEXT: ; return to shader part epilog 2882; 2883; GFX10PLUS-LABEL: atomic_xor_i64_1d: 2884; GFX10PLUS: ; %bb.0: ; %main_body 2885; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2886; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2887; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2888; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2889; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2890; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2891; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2892; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2893; GFX10PLUS-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2894; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2895; GFX10PLUS-NEXT: ; return to shader part epilog 2896; 2897; GFX12-LABEL: atomic_xor_i64_1d: 2898; GFX12: ; %bb.0: ; %main_body 2899; GFX12-NEXT: s_mov_b32 s0, s2 2900; GFX12-NEXT: s_mov_b32 s1, s3 2901; GFX12-NEXT: s_mov_b32 s2, s4 2902; GFX12-NEXT: s_mov_b32 s3, s5 2903; GFX12-NEXT: s_mov_b32 s4, s6 2904; GFX12-NEXT: s_mov_b32 s5, s7 2905; GFX12-NEXT: s_mov_b32 s6, s8 2906; GFX12-NEXT: s_mov_b32 s7, s9 2907; GFX12-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2908; GFX12-NEXT: s_wait_loadcnt 0x0 2909; GFX12-NEXT: ; return to shader part epilog 2910main_body: 2911 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 2912 %out = bitcast i64 %v to <2 x float> 2913 ret <2 x float> %out 2914} 2915 2916define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 2917; GFX6-LABEL: atomic_inc_i64_1d: 2918; GFX6: ; %bb.0: ; %main_body 2919; GFX6-NEXT: s_mov_b32 s0, s2 2920; GFX6-NEXT: s_mov_b32 s1, s3 2921; GFX6-NEXT: s_mov_b32 s2, s4 2922; GFX6-NEXT: s_mov_b32 s3, s5 2923; GFX6-NEXT: s_mov_b32 s4, s6 2924; GFX6-NEXT: s_mov_b32 s5, s7 2925; GFX6-NEXT: s_mov_b32 s6, s8 2926; GFX6-NEXT: s_mov_b32 s7, s9 2927; GFX6-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2928; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2929; GFX6-NEXT: ; return to shader part epilog 2930; 2931; GFX8-LABEL: atomic_inc_i64_1d: 2932; GFX8: ; %bb.0: ; %main_body 2933; GFX8-NEXT: s_mov_b32 s0, s2 2934; GFX8-NEXT: s_mov_b32 s1, s3 2935; GFX8-NEXT: s_mov_b32 s2, s4 2936; GFX8-NEXT: s_mov_b32 s3, s5 2937; GFX8-NEXT: s_mov_b32 s4, s6 2938; GFX8-NEXT: s_mov_b32 s5, s7 2939; GFX8-NEXT: s_mov_b32 s6, s8 2940; GFX8-NEXT: s_mov_b32 s7, s9 2941; GFX8-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2942; GFX8-NEXT: s_waitcnt vmcnt(0) 2943; GFX8-NEXT: ; return to shader part epilog 2944; 2945; GFX900-LABEL: atomic_inc_i64_1d: 2946; GFX900: ; %bb.0: ; %main_body 2947; GFX900-NEXT: s_mov_b32 s0, s2 2948; GFX900-NEXT: s_mov_b32 s1, s3 2949; GFX900-NEXT: s_mov_b32 s2, s4 2950; GFX900-NEXT: s_mov_b32 s3, s5 2951; GFX900-NEXT: s_mov_b32 s4, s6 2952; GFX900-NEXT: s_mov_b32 s5, s7 2953; GFX900-NEXT: s_mov_b32 s6, s8 2954; GFX900-NEXT: s_mov_b32 s7, s9 2955; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2956; GFX900-NEXT: s_waitcnt vmcnt(0) 2957; GFX900-NEXT: ; return to shader part epilog 2958; 2959; GFX90A-LABEL: atomic_inc_i64_1d: 2960; GFX90A: ; %bb.0: ; %main_body 2961; GFX90A-NEXT: s_mov_b32 s0, s2 2962; GFX90A-NEXT: s_mov_b32 s1, s3 2963; GFX90A-NEXT: s_mov_b32 s2, s4 2964; GFX90A-NEXT: s_mov_b32 s3, s5 2965; GFX90A-NEXT: s_mov_b32 s4, s6 2966; GFX90A-NEXT: s_mov_b32 s5, s7 2967; GFX90A-NEXT: s_mov_b32 s6, s8 2968; GFX90A-NEXT: s_mov_b32 s7, s9 2969; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc 2970; GFX90A-NEXT: s_waitcnt vmcnt(0) 2971; GFX90A-NEXT: ; return to shader part epilog 2972; 2973; GFX10PLUS-LABEL: atomic_inc_i64_1d: 2974; GFX10PLUS: ; %bb.0: ; %main_body 2975; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2976; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2977; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2978; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2979; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2980; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2981; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2982; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2983; GFX10PLUS-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 2984; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2985; GFX10PLUS-NEXT: ; return to shader part epilog 2986; 2987; GFX12-LABEL: atomic_inc_i64_1d: 2988; GFX12: ; %bb.0: ; %main_body 2989; GFX12-NEXT: s_mov_b32 s0, s2 2990; GFX12-NEXT: s_mov_b32 s1, s3 2991; GFX12-NEXT: s_mov_b32 s2, s4 2992; GFX12-NEXT: s_mov_b32 s3, s5 2993; GFX12-NEXT: s_mov_b32 s4, s6 2994; GFX12-NEXT: s_mov_b32 s5, s7 2995; GFX12-NEXT: s_mov_b32 s6, s8 2996; GFX12-NEXT: s_mov_b32 s7, s9 2997; GFX12-NEXT: image_atomic_inc_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 2998; GFX12-NEXT: s_wait_loadcnt 0x0 2999; GFX12-NEXT: ; return to shader part epilog 3000main_body: 3001 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 3002 %out = bitcast i64 %v to <2 x float> 3003 ret <2 x float> %out 3004} 3005 3006define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 3007; GFX6-LABEL: atomic_dec_i64_1d: 3008; GFX6: ; %bb.0: ; %main_body 3009; GFX6-NEXT: s_mov_b32 s0, s2 3010; GFX6-NEXT: s_mov_b32 s1, s3 3011; GFX6-NEXT: s_mov_b32 s2, s4 3012; GFX6-NEXT: s_mov_b32 s3, s5 3013; GFX6-NEXT: s_mov_b32 s4, s6 3014; GFX6-NEXT: s_mov_b32 s5, s7 3015; GFX6-NEXT: s_mov_b32 s6, s8 3016; GFX6-NEXT: s_mov_b32 s7, s9 3017; GFX6-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc 3018; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3019; GFX6-NEXT: ; return to shader part epilog 3020; 3021; GFX8-LABEL: atomic_dec_i64_1d: 3022; GFX8: ; %bb.0: ; %main_body 3023; GFX8-NEXT: s_mov_b32 s0, s2 3024; GFX8-NEXT: s_mov_b32 s1, s3 3025; GFX8-NEXT: s_mov_b32 s2, s4 3026; GFX8-NEXT: s_mov_b32 s3, s5 3027; GFX8-NEXT: s_mov_b32 s4, s6 3028; GFX8-NEXT: s_mov_b32 s5, s7 3029; GFX8-NEXT: s_mov_b32 s6, s8 3030; GFX8-NEXT: s_mov_b32 s7, s9 3031; GFX8-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc 3032; GFX8-NEXT: s_waitcnt vmcnt(0) 3033; GFX8-NEXT: ; return to shader part epilog 3034; 3035; GFX900-LABEL: atomic_dec_i64_1d: 3036; GFX900: ; %bb.0: ; %main_body 3037; GFX900-NEXT: s_mov_b32 s0, s2 3038; GFX900-NEXT: s_mov_b32 s1, s3 3039; GFX900-NEXT: s_mov_b32 s2, s4 3040; GFX900-NEXT: s_mov_b32 s3, s5 3041; GFX900-NEXT: s_mov_b32 s4, s6 3042; GFX900-NEXT: s_mov_b32 s5, s7 3043; GFX900-NEXT: s_mov_b32 s6, s8 3044; GFX900-NEXT: s_mov_b32 s7, s9 3045; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc 3046; GFX900-NEXT: s_waitcnt vmcnt(0) 3047; GFX900-NEXT: ; return to shader part epilog 3048; 3049; GFX90A-LABEL: atomic_dec_i64_1d: 3050; GFX90A: ; %bb.0: ; %main_body 3051; GFX90A-NEXT: s_mov_b32 s0, s2 3052; GFX90A-NEXT: s_mov_b32 s1, s3 3053; GFX90A-NEXT: s_mov_b32 s2, s4 3054; GFX90A-NEXT: s_mov_b32 s3, s5 3055; GFX90A-NEXT: s_mov_b32 s4, s6 3056; GFX90A-NEXT: s_mov_b32 s5, s7 3057; GFX90A-NEXT: s_mov_b32 s6, s8 3058; GFX90A-NEXT: s_mov_b32 s7, s9 3059; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc 3060; GFX90A-NEXT: s_waitcnt vmcnt(0) 3061; GFX90A-NEXT: ; return to shader part epilog 3062; 3063; GFX10PLUS-LABEL: atomic_dec_i64_1d: 3064; GFX10PLUS: ; %bb.0: ; %main_body 3065; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3066; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3067; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3068; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3069; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3070; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3071; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3072; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3073; GFX10PLUS-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc 3074; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3075; GFX10PLUS-NEXT: ; return to shader part epilog 3076; 3077; GFX12-LABEL: atomic_dec_i64_1d: 3078; GFX12: ; %bb.0: ; %main_body 3079; GFX12-NEXT: s_mov_b32 s0, s2 3080; GFX12-NEXT: s_mov_b32 s1, s3 3081; GFX12-NEXT: s_mov_b32 s2, s4 3082; GFX12-NEXT: s_mov_b32 s3, s5 3083; GFX12-NEXT: s_mov_b32 s4, s6 3084; GFX12-NEXT: s_mov_b32 s5, s7 3085; GFX12-NEXT: s_mov_b32 s6, s8 3086; GFX12-NEXT: s_mov_b32 s7, s9 3087; GFX12-NEXT: image_atomic_dec_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 3088; GFX12-NEXT: s_wait_loadcnt 0x0 3089; GFX12-NEXT: ; return to shader part epilog 3090main_body: 3091 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 3092 %out = bitcast i64 %v to <2 x float> 3093 ret <2 x float> %out 3094} 3095 3096define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { 3097; GFX6-LABEL: atomic_cmpswap_i64_1d: 3098; GFX6: ; %bb.0: ; %main_body 3099; GFX6-NEXT: s_mov_b32 s0, s2 3100; GFX6-NEXT: s_mov_b32 s1, s3 3101; GFX6-NEXT: s_mov_b32 s2, s4 3102; GFX6-NEXT: s_mov_b32 s3, s5 3103; GFX6-NEXT: s_mov_b32 s4, s6 3104; GFX6-NEXT: s_mov_b32 s5, s7 3105; GFX6-NEXT: s_mov_b32 s6, s8 3106; GFX6-NEXT: s_mov_b32 s7, s9 3107; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3108; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3109; GFX6-NEXT: ; return to shader part epilog 3110; 3111; GFX8-LABEL: atomic_cmpswap_i64_1d: 3112; GFX8: ; %bb.0: ; %main_body 3113; GFX8-NEXT: s_mov_b32 s0, s2 3114; GFX8-NEXT: s_mov_b32 s1, s3 3115; GFX8-NEXT: s_mov_b32 s2, s4 3116; GFX8-NEXT: s_mov_b32 s3, s5 3117; GFX8-NEXT: s_mov_b32 s4, s6 3118; GFX8-NEXT: s_mov_b32 s5, s7 3119; GFX8-NEXT: s_mov_b32 s6, s8 3120; GFX8-NEXT: s_mov_b32 s7, s9 3121; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3122; GFX8-NEXT: s_waitcnt vmcnt(0) 3123; GFX8-NEXT: ; return to shader part epilog 3124; 3125; GFX900-LABEL: atomic_cmpswap_i64_1d: 3126; GFX900: ; %bb.0: ; %main_body 3127; GFX900-NEXT: s_mov_b32 s0, s2 3128; GFX900-NEXT: s_mov_b32 s1, s3 3129; GFX900-NEXT: s_mov_b32 s2, s4 3130; GFX900-NEXT: s_mov_b32 s3, s5 3131; GFX900-NEXT: s_mov_b32 s4, s6 3132; GFX900-NEXT: s_mov_b32 s5, s7 3133; GFX900-NEXT: s_mov_b32 s6, s8 3134; GFX900-NEXT: s_mov_b32 s7, s9 3135; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3136; GFX900-NEXT: s_waitcnt vmcnt(0) 3137; GFX900-NEXT: ; return to shader part epilog 3138; 3139; GFX90A-LABEL: atomic_cmpswap_i64_1d: 3140; GFX90A: ; %bb.0: ; %main_body 3141; GFX90A-NEXT: s_mov_b32 s0, s2 3142; GFX90A-NEXT: s_mov_b32 s1, s3 3143; GFX90A-NEXT: s_mov_b32 s2, s4 3144; GFX90A-NEXT: s_mov_b32 s3, s5 3145; GFX90A-NEXT: s_mov_b32 s4, s6 3146; GFX90A-NEXT: s_mov_b32 s5, s7 3147; GFX90A-NEXT: s_mov_b32 s6, s8 3148; GFX90A-NEXT: s_mov_b32 s7, s9 3149; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3150; GFX90A-NEXT: s_waitcnt vmcnt(0) 3151; GFX90A-NEXT: ; return to shader part epilog 3152; 3153; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d: 3154; GFX10PLUS: ; %bb.0: ; %main_body 3155; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3156; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3157; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3158; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3159; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3160; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3161; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3162; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3163; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc 3164; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3165; GFX10PLUS-NEXT: ; return to shader part epilog 3166; 3167; GFX12-LABEL: atomic_cmpswap_i64_1d: 3168; GFX12: ; %bb.0: ; %main_body 3169; GFX12-NEXT: s_mov_b32 s0, s2 3170; GFX12-NEXT: s_mov_b32 s1, s3 3171; GFX12-NEXT: s_mov_b32 s2, s4 3172; GFX12-NEXT: s_mov_b32 s3, s5 3173; GFX12-NEXT: s_mov_b32 s4, s6 3174; GFX12-NEXT: s_mov_b32 s5, s7 3175; GFX12-NEXT: s_mov_b32 s6, s8 3176; GFX12-NEXT: s_mov_b32 s7, s9 3177; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 3178; GFX12-NEXT: s_wait_loadcnt 0x0 3179; GFX12-NEXT: ; return to shader part epilog 3180main_body: 3181 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 3182 %out = bitcast i64 %v to <2 x float> 3183 ret <2 x float> %out 3184} 3185 3186define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { 3187; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return: 3188; GFX6: ; %bb.0: ; %main_body 3189; GFX6-NEXT: s_mov_b32 s0, s2 3190; GFX6-NEXT: s_mov_b32 s1, s3 3191; GFX6-NEXT: s_mov_b32 s2, s4 3192; GFX6-NEXT: s_mov_b32 s3, s5 3193; GFX6-NEXT: s_mov_b32 s4, s6 3194; GFX6-NEXT: s_mov_b32 s5, s7 3195; GFX6-NEXT: s_mov_b32 s6, s8 3196; GFX6-NEXT: s_mov_b32 s7, s9 3197; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3198; GFX6-NEXT: s_endpgm 3199; 3200; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return: 3201; GFX8: ; %bb.0: ; %main_body 3202; GFX8-NEXT: s_mov_b32 s0, s2 3203; GFX8-NEXT: s_mov_b32 s1, s3 3204; GFX8-NEXT: s_mov_b32 s2, s4 3205; GFX8-NEXT: s_mov_b32 s3, s5 3206; GFX8-NEXT: s_mov_b32 s4, s6 3207; GFX8-NEXT: s_mov_b32 s5, s7 3208; GFX8-NEXT: s_mov_b32 s6, s8 3209; GFX8-NEXT: s_mov_b32 s7, s9 3210; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3211; GFX8-NEXT: s_endpgm 3212; 3213; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return: 3214; GFX900: ; %bb.0: ; %main_body 3215; GFX900-NEXT: s_mov_b32 s0, s2 3216; GFX900-NEXT: s_mov_b32 s1, s3 3217; GFX900-NEXT: s_mov_b32 s2, s4 3218; GFX900-NEXT: s_mov_b32 s3, s5 3219; GFX900-NEXT: s_mov_b32 s4, s6 3220; GFX900-NEXT: s_mov_b32 s5, s7 3221; GFX900-NEXT: s_mov_b32 s6, s8 3222; GFX900-NEXT: s_mov_b32 s7, s9 3223; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3224; GFX900-NEXT: s_endpgm 3225; 3226; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return: 3227; GFX90A: ; %bb.0: ; %main_body 3228; GFX90A-NEXT: s_mov_b32 s0, s2 3229; GFX90A-NEXT: s_mov_b32 s1, s3 3230; GFX90A-NEXT: s_mov_b32 s2, s4 3231; GFX90A-NEXT: s_mov_b32 s3, s5 3232; GFX90A-NEXT: s_mov_b32 s4, s6 3233; GFX90A-NEXT: s_mov_b32 s5, s7 3234; GFX90A-NEXT: s_mov_b32 s6, s8 3235; GFX90A-NEXT: s_mov_b32 s7, s9 3236; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc 3237; GFX90A-NEXT: s_endpgm 3238; 3239; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return: 3240; GFX10PLUS: ; %bb.0: ; %main_body 3241; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3242; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3243; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3244; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3245; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3246; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3247; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3248; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3249; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc 3250; GFX10PLUS-NEXT: s_endpgm 3251; 3252; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return: 3253; GFX12: ; %bb.0: ; %main_body 3254; GFX12-NEXT: s_mov_b32 s0, s2 3255; GFX12-NEXT: s_mov_b32 s1, s3 3256; GFX12-NEXT: s_mov_b32 s2, s4 3257; GFX12-NEXT: s_mov_b32 s3, s5 3258; GFX12-NEXT: s_mov_b32 s4, s6 3259; GFX12-NEXT: s_mov_b32 s5, s7 3260; GFX12-NEXT: s_mov_b32 s6, s8 3261; GFX12-NEXT: s_mov_b32 s7, s9 3262; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN 3263; GFX12-NEXT: s_endpgm 3264main_body: 3265 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 3266 ret void 3267} 3268 3269define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) { 3270; GFX6-LABEL: atomic_add_i64_2d: 3271; GFX6: ; %bb.0: ; %main_body 3272; GFX6-NEXT: s_mov_b32 s0, s2 3273; GFX6-NEXT: s_mov_b32 s1, s3 3274; GFX6-NEXT: s_mov_b32 s2, s4 3275; GFX6-NEXT: s_mov_b32 s3, s5 3276; GFX6-NEXT: s_mov_b32 s4, s6 3277; GFX6-NEXT: s_mov_b32 s5, s7 3278; GFX6-NEXT: s_mov_b32 s6, s8 3279; GFX6-NEXT: s_mov_b32 s7, s9 3280; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc 3281; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3282; GFX6-NEXT: ; return to shader part epilog 3283; 3284; GFX8-LABEL: atomic_add_i64_2d: 3285; GFX8: ; %bb.0: ; %main_body 3286; GFX8-NEXT: s_mov_b32 s0, s2 3287; GFX8-NEXT: s_mov_b32 s1, s3 3288; GFX8-NEXT: s_mov_b32 s2, s4 3289; GFX8-NEXT: s_mov_b32 s3, s5 3290; GFX8-NEXT: s_mov_b32 s4, s6 3291; GFX8-NEXT: s_mov_b32 s5, s7 3292; GFX8-NEXT: s_mov_b32 s6, s8 3293; GFX8-NEXT: s_mov_b32 s7, s9 3294; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc 3295; GFX8-NEXT: s_waitcnt vmcnt(0) 3296; GFX8-NEXT: ; return to shader part epilog 3297; 3298; GFX900-LABEL: atomic_add_i64_2d: 3299; GFX900: ; %bb.0: ; %main_body 3300; GFX900-NEXT: s_mov_b32 s0, s2 3301; GFX900-NEXT: s_mov_b32 s1, s3 3302; GFX900-NEXT: s_mov_b32 s2, s4 3303; GFX900-NEXT: s_mov_b32 s3, s5 3304; GFX900-NEXT: s_mov_b32 s4, s6 3305; GFX900-NEXT: s_mov_b32 s5, s7 3306; GFX900-NEXT: s_mov_b32 s6, s8 3307; GFX900-NEXT: s_mov_b32 s7, s9 3308; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc 3309; GFX900-NEXT: s_waitcnt vmcnt(0) 3310; GFX900-NEXT: ; return to shader part epilog 3311; 3312; GFX90A-LABEL: atomic_add_i64_2d: 3313; GFX90A: ; %bb.0: ; %main_body 3314; GFX90A-NEXT: s_mov_b32 s0, s2 3315; GFX90A-NEXT: s_mov_b32 s1, s3 3316; GFX90A-NEXT: s_mov_b32 s2, s4 3317; GFX90A-NEXT: s_mov_b32 s3, s5 3318; GFX90A-NEXT: s_mov_b32 s4, s6 3319; GFX90A-NEXT: s_mov_b32 s5, s7 3320; GFX90A-NEXT: s_mov_b32 s6, s8 3321; GFX90A-NEXT: s_mov_b32 s7, s9 3322; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc 3323; GFX90A-NEXT: s_waitcnt vmcnt(0) 3324; GFX90A-NEXT: ; return to shader part epilog 3325; 3326; GFX10PLUS-LABEL: atomic_add_i64_2d: 3327; GFX10PLUS: ; %bb.0: ; %main_body 3328; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3329; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3330; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3331; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3332; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3333; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3334; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3335; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3336; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc 3337; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3338; GFX10PLUS-NEXT: ; return to shader part epilog 3339; 3340; GFX12-LABEL: atomic_add_i64_2d: 3341; GFX12: ; %bb.0: ; %main_body 3342; GFX12-NEXT: s_mov_b32 s0, s2 3343; GFX12-NEXT: s_mov_b32 s1, s3 3344; GFX12-NEXT: s_mov_b32 s2, s4 3345; GFX12-NEXT: s_mov_b32 s3, s5 3346; GFX12-NEXT: s_mov_b32 s4, s6 3347; GFX12-NEXT: s_mov_b32 s5, s7 3348; GFX12-NEXT: s_mov_b32 s6, s8 3349; GFX12-NEXT: s_mov_b32 s7, s9 3350; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN 3351; GFX12-NEXT: s_wait_loadcnt 0x0 3352; GFX12-NEXT: ; return to shader part epilog 3353main_body: 3354 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 3355 %out = bitcast i64 %v to <2 x float> 3356 ret <2 x float> %out 3357} 3358 3359define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) { 3360; GFX6-LABEL: atomic_add_i64_3d: 3361; GFX6: ; %bb.0: ; %main_body 3362; GFX6-NEXT: s_mov_b32 s0, s2 3363; GFX6-NEXT: s_mov_b32 s1, s3 3364; GFX6-NEXT: s_mov_b32 s2, s4 3365; GFX6-NEXT: s_mov_b32 s3, s5 3366; GFX6-NEXT: s_mov_b32 s4, s6 3367; GFX6-NEXT: s_mov_b32 s5, s7 3368; GFX6-NEXT: s_mov_b32 s6, s8 3369; GFX6-NEXT: s_mov_b32 s7, s9 3370; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3371; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3372; GFX6-NEXT: ; return to shader part epilog 3373; 3374; GFX8-LABEL: atomic_add_i64_3d: 3375; GFX8: ; %bb.0: ; %main_body 3376; GFX8-NEXT: s_mov_b32 s0, s2 3377; GFX8-NEXT: s_mov_b32 s1, s3 3378; GFX8-NEXT: s_mov_b32 s2, s4 3379; GFX8-NEXT: s_mov_b32 s3, s5 3380; GFX8-NEXT: s_mov_b32 s4, s6 3381; GFX8-NEXT: s_mov_b32 s5, s7 3382; GFX8-NEXT: s_mov_b32 s6, s8 3383; GFX8-NEXT: s_mov_b32 s7, s9 3384; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3385; GFX8-NEXT: s_waitcnt vmcnt(0) 3386; GFX8-NEXT: ; return to shader part epilog 3387; 3388; GFX900-LABEL: atomic_add_i64_3d: 3389; GFX900: ; %bb.0: ; %main_body 3390; GFX900-NEXT: s_mov_b32 s0, s2 3391; GFX900-NEXT: s_mov_b32 s1, s3 3392; GFX900-NEXT: s_mov_b32 s2, s4 3393; GFX900-NEXT: s_mov_b32 s3, s5 3394; GFX900-NEXT: s_mov_b32 s4, s6 3395; GFX900-NEXT: s_mov_b32 s5, s7 3396; GFX900-NEXT: s_mov_b32 s6, s8 3397; GFX900-NEXT: s_mov_b32 s7, s9 3398; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3399; GFX900-NEXT: s_waitcnt vmcnt(0) 3400; GFX900-NEXT: ; return to shader part epilog 3401; 3402; GFX90A-LABEL: atomic_add_i64_3d: 3403; GFX90A: ; %bb.0: ; %main_body 3404; GFX90A-NEXT: s_mov_b32 s0, s2 3405; GFX90A-NEXT: s_mov_b32 s1, s3 3406; GFX90A-NEXT: s_mov_b32 s2, s4 3407; GFX90A-NEXT: s_mov_b32 s3, s5 3408; GFX90A-NEXT: s_mov_b32 s4, s6 3409; GFX90A-NEXT: s_mov_b32 s5, s7 3410; GFX90A-NEXT: s_mov_b32 s6, s8 3411; GFX90A-NEXT: s_mov_b32 s7, s9 3412; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3413; GFX90A-NEXT: s_waitcnt vmcnt(0) 3414; GFX90A-NEXT: ; return to shader part epilog 3415; 3416; GFX10PLUS-LABEL: atomic_add_i64_3d: 3417; GFX10PLUS: ; %bb.0: ; %main_body 3418; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3419; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3420; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3421; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3422; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3423; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3424; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3425; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3426; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc 3427; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3428; GFX10PLUS-NEXT: ; return to shader part epilog 3429; 3430; GFX12-LABEL: atomic_add_i64_3d: 3431; GFX12: ; %bb.0: ; %main_body 3432; GFX12-NEXT: s_mov_b32 s0, s2 3433; GFX12-NEXT: s_mov_b32 s1, s3 3434; GFX12-NEXT: s_mov_b32 s2, s4 3435; GFX12-NEXT: s_mov_b32 s3, s5 3436; GFX12-NEXT: s_mov_b32 s4, s6 3437; GFX12-NEXT: s_mov_b32 s5, s7 3438; GFX12-NEXT: s_mov_b32 s6, s8 3439; GFX12-NEXT: s_mov_b32 s7, s9 3440; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN 3441; GFX12-NEXT: s_wait_loadcnt 0x0 3442; GFX12-NEXT: ; return to shader part epilog 3443main_body: 3444 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 3445 %out = bitcast i64 %v to <2 x float> 3446 ret <2 x float> %out 3447} 3448 3449define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) { 3450; GFX6-LABEL: atomic_add_i64_cube: 3451; GFX6: ; %bb.0: ; %main_body 3452; GFX6-NEXT: s_mov_b32 s0, s2 3453; GFX6-NEXT: s_mov_b32 s1, s3 3454; GFX6-NEXT: s_mov_b32 s2, s4 3455; GFX6-NEXT: s_mov_b32 s3, s5 3456; GFX6-NEXT: s_mov_b32 s4, s6 3457; GFX6-NEXT: s_mov_b32 s5, s7 3458; GFX6-NEXT: s_mov_b32 s6, s8 3459; GFX6-NEXT: s_mov_b32 s7, s9 3460; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3461; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3462; GFX6-NEXT: ; return to shader part epilog 3463; 3464; GFX8-LABEL: atomic_add_i64_cube: 3465; GFX8: ; %bb.0: ; %main_body 3466; GFX8-NEXT: s_mov_b32 s0, s2 3467; GFX8-NEXT: s_mov_b32 s1, s3 3468; GFX8-NEXT: s_mov_b32 s2, s4 3469; GFX8-NEXT: s_mov_b32 s3, s5 3470; GFX8-NEXT: s_mov_b32 s4, s6 3471; GFX8-NEXT: s_mov_b32 s5, s7 3472; GFX8-NEXT: s_mov_b32 s6, s8 3473; GFX8-NEXT: s_mov_b32 s7, s9 3474; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3475; GFX8-NEXT: s_waitcnt vmcnt(0) 3476; GFX8-NEXT: ; return to shader part epilog 3477; 3478; GFX900-LABEL: atomic_add_i64_cube: 3479; GFX900: ; %bb.0: ; %main_body 3480; GFX900-NEXT: s_mov_b32 s0, s2 3481; GFX900-NEXT: s_mov_b32 s1, s3 3482; GFX900-NEXT: s_mov_b32 s2, s4 3483; GFX900-NEXT: s_mov_b32 s3, s5 3484; GFX900-NEXT: s_mov_b32 s4, s6 3485; GFX900-NEXT: s_mov_b32 s5, s7 3486; GFX900-NEXT: s_mov_b32 s6, s8 3487; GFX900-NEXT: s_mov_b32 s7, s9 3488; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3489; GFX900-NEXT: s_waitcnt vmcnt(0) 3490; GFX900-NEXT: ; return to shader part epilog 3491; 3492; GFX90A-LABEL: atomic_add_i64_cube: 3493; GFX90A: ; %bb.0: ; %main_body 3494; GFX90A-NEXT: s_mov_b32 s0, s2 3495; GFX90A-NEXT: s_mov_b32 s1, s3 3496; GFX90A-NEXT: s_mov_b32 s2, s4 3497; GFX90A-NEXT: s_mov_b32 s3, s5 3498; GFX90A-NEXT: s_mov_b32 s4, s6 3499; GFX90A-NEXT: s_mov_b32 s5, s7 3500; GFX90A-NEXT: s_mov_b32 s6, s8 3501; GFX90A-NEXT: s_mov_b32 s7, s9 3502; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3503; GFX90A-NEXT: s_waitcnt vmcnt(0) 3504; GFX90A-NEXT: ; return to shader part epilog 3505; 3506; GFX10PLUS-LABEL: atomic_add_i64_cube: 3507; GFX10PLUS: ; %bb.0: ; %main_body 3508; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3509; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3510; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3511; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3512; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3513; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3514; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3515; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3516; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc 3517; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3518; GFX10PLUS-NEXT: ; return to shader part epilog 3519; 3520; GFX12-LABEL: atomic_add_i64_cube: 3521; GFX12: ; %bb.0: ; %main_body 3522; GFX12-NEXT: s_mov_b32 s0, s2 3523; GFX12-NEXT: s_mov_b32 s1, s3 3524; GFX12-NEXT: s_mov_b32 s2, s4 3525; GFX12-NEXT: s_mov_b32 s3, s5 3526; GFX12-NEXT: s_mov_b32 s4, s6 3527; GFX12-NEXT: s_mov_b32 s5, s7 3528; GFX12-NEXT: s_mov_b32 s6, s8 3529; GFX12-NEXT: s_mov_b32 s7, s9 3530; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN 3531; GFX12-NEXT: s_wait_loadcnt 0x0 3532; GFX12-NEXT: ; return to shader part epilog 3533main_body: 3534 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) 3535 %out = bitcast i64 %v to <2 x float> 3536 ret <2 x float> %out 3537} 3538 3539define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) { 3540; GFX6-LABEL: atomic_add_i64_1darray: 3541; GFX6: ; %bb.0: ; %main_body 3542; GFX6-NEXT: s_mov_b32 s0, s2 3543; GFX6-NEXT: s_mov_b32 s1, s3 3544; GFX6-NEXT: s_mov_b32 s2, s4 3545; GFX6-NEXT: s_mov_b32 s3, s5 3546; GFX6-NEXT: s_mov_b32 s4, s6 3547; GFX6-NEXT: s_mov_b32 s5, s7 3548; GFX6-NEXT: s_mov_b32 s6, s8 3549; GFX6-NEXT: s_mov_b32 s7, s9 3550; GFX6-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da 3551; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3552; GFX6-NEXT: ; return to shader part epilog 3553; 3554; GFX8-LABEL: atomic_add_i64_1darray: 3555; GFX8: ; %bb.0: ; %main_body 3556; GFX8-NEXT: s_mov_b32 s0, s2 3557; GFX8-NEXT: s_mov_b32 s1, s3 3558; GFX8-NEXT: s_mov_b32 s2, s4 3559; GFX8-NEXT: s_mov_b32 s3, s5 3560; GFX8-NEXT: s_mov_b32 s4, s6 3561; GFX8-NEXT: s_mov_b32 s5, s7 3562; GFX8-NEXT: s_mov_b32 s6, s8 3563; GFX8-NEXT: s_mov_b32 s7, s9 3564; GFX8-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da 3565; GFX8-NEXT: s_waitcnt vmcnt(0) 3566; GFX8-NEXT: ; return to shader part epilog 3567; 3568; GFX900-LABEL: atomic_add_i64_1darray: 3569; GFX900: ; %bb.0: ; %main_body 3570; GFX900-NEXT: s_mov_b32 s0, s2 3571; GFX900-NEXT: s_mov_b32 s1, s3 3572; GFX900-NEXT: s_mov_b32 s2, s4 3573; GFX900-NEXT: s_mov_b32 s3, s5 3574; GFX900-NEXT: s_mov_b32 s4, s6 3575; GFX900-NEXT: s_mov_b32 s5, s7 3576; GFX900-NEXT: s_mov_b32 s6, s8 3577; GFX900-NEXT: s_mov_b32 s7, s9 3578; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da 3579; GFX900-NEXT: s_waitcnt vmcnt(0) 3580; GFX900-NEXT: ; return to shader part epilog 3581; 3582; GFX90A-LABEL: atomic_add_i64_1darray: 3583; GFX90A: ; %bb.0: ; %main_body 3584; GFX90A-NEXT: s_mov_b32 s0, s2 3585; GFX90A-NEXT: s_mov_b32 s1, s3 3586; GFX90A-NEXT: s_mov_b32 s2, s4 3587; GFX90A-NEXT: s_mov_b32 s3, s5 3588; GFX90A-NEXT: s_mov_b32 s4, s6 3589; GFX90A-NEXT: s_mov_b32 s5, s7 3590; GFX90A-NEXT: s_mov_b32 s6, s8 3591; GFX90A-NEXT: s_mov_b32 s7, s9 3592; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da 3593; GFX90A-NEXT: s_waitcnt vmcnt(0) 3594; GFX90A-NEXT: ; return to shader part epilog 3595; 3596; GFX10PLUS-LABEL: atomic_add_i64_1darray: 3597; GFX10PLUS: ; %bb.0: ; %main_body 3598; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3599; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3600; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3601; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3602; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3603; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3604; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3605; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3606; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc 3607; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3608; GFX10PLUS-NEXT: ; return to shader part epilog 3609; 3610; GFX12-LABEL: atomic_add_i64_1darray: 3611; GFX12: ; %bb.0: ; %main_body 3612; GFX12-NEXT: s_mov_b32 s0, s2 3613; GFX12-NEXT: s_mov_b32 s1, s3 3614; GFX12-NEXT: s_mov_b32 s2, s4 3615; GFX12-NEXT: s_mov_b32 s3, s5 3616; GFX12-NEXT: s_mov_b32 s4, s6 3617; GFX12-NEXT: s_mov_b32 s5, s7 3618; GFX12-NEXT: s_mov_b32 s6, s8 3619; GFX12-NEXT: s_mov_b32 s7, s9 3620; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN 3621; GFX12-NEXT: s_wait_loadcnt 0x0 3622; GFX12-NEXT: ; return to shader part epilog 3623main_body: 3624 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 3625 %out = bitcast i64 %v to <2 x float> 3626 ret <2 x float> %out 3627} 3628 3629define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) { 3630; GFX6-LABEL: atomic_add_i64_2darray: 3631; GFX6: ; %bb.0: ; %main_body 3632; GFX6-NEXT: s_mov_b32 s0, s2 3633; GFX6-NEXT: s_mov_b32 s1, s3 3634; GFX6-NEXT: s_mov_b32 s2, s4 3635; GFX6-NEXT: s_mov_b32 s3, s5 3636; GFX6-NEXT: s_mov_b32 s4, s6 3637; GFX6-NEXT: s_mov_b32 s5, s7 3638; GFX6-NEXT: s_mov_b32 s6, s8 3639; GFX6-NEXT: s_mov_b32 s7, s9 3640; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3641; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3642; GFX6-NEXT: ; return to shader part epilog 3643; 3644; GFX8-LABEL: atomic_add_i64_2darray: 3645; GFX8: ; %bb.0: ; %main_body 3646; GFX8-NEXT: s_mov_b32 s0, s2 3647; GFX8-NEXT: s_mov_b32 s1, s3 3648; GFX8-NEXT: s_mov_b32 s2, s4 3649; GFX8-NEXT: s_mov_b32 s3, s5 3650; GFX8-NEXT: s_mov_b32 s4, s6 3651; GFX8-NEXT: s_mov_b32 s5, s7 3652; GFX8-NEXT: s_mov_b32 s6, s8 3653; GFX8-NEXT: s_mov_b32 s7, s9 3654; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3655; GFX8-NEXT: s_waitcnt vmcnt(0) 3656; GFX8-NEXT: ; return to shader part epilog 3657; 3658; GFX900-LABEL: atomic_add_i64_2darray: 3659; GFX900: ; %bb.0: ; %main_body 3660; GFX900-NEXT: s_mov_b32 s0, s2 3661; GFX900-NEXT: s_mov_b32 s1, s3 3662; GFX900-NEXT: s_mov_b32 s2, s4 3663; GFX900-NEXT: s_mov_b32 s3, s5 3664; GFX900-NEXT: s_mov_b32 s4, s6 3665; GFX900-NEXT: s_mov_b32 s5, s7 3666; GFX900-NEXT: s_mov_b32 s6, s8 3667; GFX900-NEXT: s_mov_b32 s7, s9 3668; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3669; GFX900-NEXT: s_waitcnt vmcnt(0) 3670; GFX900-NEXT: ; return to shader part epilog 3671; 3672; GFX90A-LABEL: atomic_add_i64_2darray: 3673; GFX90A: ; %bb.0: ; %main_body 3674; GFX90A-NEXT: s_mov_b32 s0, s2 3675; GFX90A-NEXT: s_mov_b32 s1, s3 3676; GFX90A-NEXT: s_mov_b32 s2, s4 3677; GFX90A-NEXT: s_mov_b32 s3, s5 3678; GFX90A-NEXT: s_mov_b32 s4, s6 3679; GFX90A-NEXT: s_mov_b32 s5, s7 3680; GFX90A-NEXT: s_mov_b32 s6, s8 3681; GFX90A-NEXT: s_mov_b32 s7, s9 3682; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da 3683; GFX90A-NEXT: s_waitcnt vmcnt(0) 3684; GFX90A-NEXT: ; return to shader part epilog 3685; 3686; GFX10PLUS-LABEL: atomic_add_i64_2darray: 3687; GFX10PLUS: ; %bb.0: ; %main_body 3688; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3689; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3690; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3691; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3692; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3693; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3694; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3695; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3696; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc 3697; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3698; GFX10PLUS-NEXT: ; return to shader part epilog 3699; 3700; GFX12-LABEL: atomic_add_i64_2darray: 3701; GFX12: ; %bb.0: ; %main_body 3702; GFX12-NEXT: s_mov_b32 s0, s2 3703; GFX12-NEXT: s_mov_b32 s1, s3 3704; GFX12-NEXT: s_mov_b32 s2, s4 3705; GFX12-NEXT: s_mov_b32 s3, s5 3706; GFX12-NEXT: s_mov_b32 s4, s6 3707; GFX12-NEXT: s_mov_b32 s5, s7 3708; GFX12-NEXT: s_mov_b32 s6, s8 3709; GFX12-NEXT: s_mov_b32 s7, s9 3710; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN 3711; GFX12-NEXT: s_wait_loadcnt 0x0 3712; GFX12-NEXT: ; return to shader part epilog 3713main_body: 3714 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 3715 %out = bitcast i64 %v to <2 x float> 3716 ret <2 x float> %out 3717} 3718 3719define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) { 3720; GFX6-LABEL: atomic_add_i64_2dmsaa: 3721; GFX6: ; %bb.0: ; %main_body 3722; GFX6-NEXT: s_mov_b32 s0, s2 3723; GFX6-NEXT: s_mov_b32 s1, s3 3724; GFX6-NEXT: s_mov_b32 s2, s4 3725; GFX6-NEXT: s_mov_b32 s3, s5 3726; GFX6-NEXT: s_mov_b32 s4, s6 3727; GFX6-NEXT: s_mov_b32 s5, s7 3728; GFX6-NEXT: s_mov_b32 s6, s8 3729; GFX6-NEXT: s_mov_b32 s7, s9 3730; GFX6-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3731; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3732; GFX6-NEXT: ; return to shader part epilog 3733; 3734; GFX8-LABEL: atomic_add_i64_2dmsaa: 3735; GFX8: ; %bb.0: ; %main_body 3736; GFX8-NEXT: s_mov_b32 s0, s2 3737; GFX8-NEXT: s_mov_b32 s1, s3 3738; GFX8-NEXT: s_mov_b32 s2, s4 3739; GFX8-NEXT: s_mov_b32 s3, s5 3740; GFX8-NEXT: s_mov_b32 s4, s6 3741; GFX8-NEXT: s_mov_b32 s5, s7 3742; GFX8-NEXT: s_mov_b32 s6, s8 3743; GFX8-NEXT: s_mov_b32 s7, s9 3744; GFX8-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3745; GFX8-NEXT: s_waitcnt vmcnt(0) 3746; GFX8-NEXT: ; return to shader part epilog 3747; 3748; GFX900-LABEL: atomic_add_i64_2dmsaa: 3749; GFX900: ; %bb.0: ; %main_body 3750; GFX900-NEXT: s_mov_b32 s0, s2 3751; GFX900-NEXT: s_mov_b32 s1, s3 3752; GFX900-NEXT: s_mov_b32 s2, s4 3753; GFX900-NEXT: s_mov_b32 s3, s5 3754; GFX900-NEXT: s_mov_b32 s4, s6 3755; GFX900-NEXT: s_mov_b32 s5, s7 3756; GFX900-NEXT: s_mov_b32 s6, s8 3757; GFX900-NEXT: s_mov_b32 s7, s9 3758; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3759; GFX900-NEXT: s_waitcnt vmcnt(0) 3760; GFX900-NEXT: ; return to shader part epilog 3761; 3762; GFX90A-LABEL: atomic_add_i64_2dmsaa: 3763; GFX90A: ; %bb.0: ; %main_body 3764; GFX90A-NEXT: s_mov_b32 s0, s2 3765; GFX90A-NEXT: s_mov_b32 s1, s3 3766; GFX90A-NEXT: s_mov_b32 s2, s4 3767; GFX90A-NEXT: s_mov_b32 s3, s5 3768; GFX90A-NEXT: s_mov_b32 s4, s6 3769; GFX90A-NEXT: s_mov_b32 s5, s7 3770; GFX90A-NEXT: s_mov_b32 s6, s8 3771; GFX90A-NEXT: s_mov_b32 s7, s9 3772; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc 3773; GFX90A-NEXT: s_waitcnt vmcnt(0) 3774; GFX90A-NEXT: ; return to shader part epilog 3775; 3776; GFX10PLUS-LABEL: atomic_add_i64_2dmsaa: 3777; GFX10PLUS: ; %bb.0: ; %main_body 3778; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3779; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3780; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3781; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3782; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3783; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3784; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3785; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3786; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc 3787; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3788; GFX10PLUS-NEXT: ; return to shader part epilog 3789; 3790; GFX12-LABEL: atomic_add_i64_2dmsaa: 3791; GFX12: ; %bb.0: ; %main_body 3792; GFX12-NEXT: s_mov_b32 s0, s2 3793; GFX12-NEXT: s_mov_b32 s1, s3 3794; GFX12-NEXT: s_mov_b32 s2, s4 3795; GFX12-NEXT: s_mov_b32 s3, s5 3796; GFX12-NEXT: s_mov_b32 s4, s6 3797; GFX12-NEXT: s_mov_b32 s5, s7 3798; GFX12-NEXT: s_mov_b32 s6, s8 3799; GFX12-NEXT: s_mov_b32 s7, s9 3800; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN 3801; GFX12-NEXT: s_wait_loadcnt 0x0 3802; GFX12-NEXT: ; return to shader part epilog 3803main_body: 3804 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 3805 %out = bitcast i64 %v to <2 x float> 3806 ret <2 x float> %out 3807} 3808 3809define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 3810; GFX6-LABEL: atomic_add_i64_2darraymsaa: 3811; GFX6: ; %bb.0: ; %main_body 3812; GFX6-NEXT: s_mov_b32 s0, s2 3813; GFX6-NEXT: s_mov_b32 s1, s3 3814; GFX6-NEXT: s_mov_b32 s2, s4 3815; GFX6-NEXT: s_mov_b32 s3, s5 3816; GFX6-NEXT: s_mov_b32 s4, s6 3817; GFX6-NEXT: s_mov_b32 s5, s7 3818; GFX6-NEXT: s_mov_b32 s6, s8 3819; GFX6-NEXT: s_mov_b32 s7, s9 3820; GFX6-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da 3821; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3822; GFX6-NEXT: ; return to shader part epilog 3823; 3824; GFX8-LABEL: atomic_add_i64_2darraymsaa: 3825; GFX8: ; %bb.0: ; %main_body 3826; GFX8-NEXT: s_mov_b32 s0, s2 3827; GFX8-NEXT: s_mov_b32 s1, s3 3828; GFX8-NEXT: s_mov_b32 s2, s4 3829; GFX8-NEXT: s_mov_b32 s3, s5 3830; GFX8-NEXT: s_mov_b32 s4, s6 3831; GFX8-NEXT: s_mov_b32 s5, s7 3832; GFX8-NEXT: s_mov_b32 s6, s8 3833; GFX8-NEXT: s_mov_b32 s7, s9 3834; GFX8-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da 3835; GFX8-NEXT: s_waitcnt vmcnt(0) 3836; GFX8-NEXT: ; return to shader part epilog 3837; 3838; GFX900-LABEL: atomic_add_i64_2darraymsaa: 3839; GFX900: ; %bb.0: ; %main_body 3840; GFX900-NEXT: s_mov_b32 s0, s2 3841; GFX900-NEXT: s_mov_b32 s1, s3 3842; GFX900-NEXT: s_mov_b32 s2, s4 3843; GFX900-NEXT: s_mov_b32 s3, s5 3844; GFX900-NEXT: s_mov_b32 s4, s6 3845; GFX900-NEXT: s_mov_b32 s5, s7 3846; GFX900-NEXT: s_mov_b32 s6, s8 3847; GFX900-NEXT: s_mov_b32 s7, s9 3848; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da 3849; GFX900-NEXT: s_waitcnt vmcnt(0) 3850; GFX900-NEXT: ; return to shader part epilog 3851; 3852; GFX90A-LABEL: atomic_add_i64_2darraymsaa: 3853; GFX90A: ; %bb.0: ; %main_body 3854; GFX90A-NEXT: s_mov_b32 s0, s2 3855; GFX90A-NEXT: s_mov_b32 s1, s3 3856; GFX90A-NEXT: s_mov_b32 s2, s4 3857; GFX90A-NEXT: s_mov_b32 s3, s5 3858; GFX90A-NEXT: s_mov_b32 s4, s6 3859; GFX90A-NEXT: s_mov_b32 s5, s7 3860; GFX90A-NEXT: s_mov_b32 s6, s8 3861; GFX90A-NEXT: s_mov_b32 s7, s9 3862; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da 3863; GFX90A-NEXT: s_waitcnt vmcnt(0) 3864; GFX90A-NEXT: ; return to shader part epilog 3865; 3866; GFX10PLUS-LABEL: atomic_add_i64_2darraymsaa: 3867; GFX10PLUS: ; %bb.0: ; %main_body 3868; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3869; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3870; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3871; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3872; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3873; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3874; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3875; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3876; GFX10PLUS-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc 3877; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3878; GFX10PLUS-NEXT: ; return to shader part epilog 3879; 3880; GFX12-LABEL: atomic_add_i64_2darraymsaa: 3881; GFX12: ; %bb.0: ; %main_body 3882; GFX12-NEXT: s_mov_b32 s0, s2 3883; GFX12-NEXT: s_mov_b32 s1, s3 3884; GFX12-NEXT: s_mov_b32 s2, s4 3885; GFX12-NEXT: s_mov_b32 s3, s5 3886; GFX12-NEXT: s_mov_b32 s4, s6 3887; GFX12-NEXT: s_mov_b32 s5, s7 3888; GFX12-NEXT: s_mov_b32 s6, s8 3889; GFX12-NEXT: s_mov_b32 s7, s9 3890; GFX12-NEXT: image_atomic_add_uint v[0:1], [v2, v3, v4, v5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN 3891; GFX12-NEXT: s_wait_loadcnt 0x0 3892; GFX12-NEXT: ; return to shader part epilog 3893main_body: 3894 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 3895 %out = bitcast i64 %v to <2 x float> 3896 ret <2 x float> %out 3897} 3898 3899define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { 3900; GFX6-LABEL: atomic_add_i64_1d_slc: 3901; GFX6: ; %bb.0: ; %main_body 3902; GFX6-NEXT: s_mov_b32 s0, s2 3903; GFX6-NEXT: s_mov_b32 s1, s3 3904; GFX6-NEXT: s_mov_b32 s2, s4 3905; GFX6-NEXT: s_mov_b32 s3, s5 3906; GFX6-NEXT: s_mov_b32 s4, s6 3907; GFX6-NEXT: s_mov_b32 s5, s7 3908; GFX6-NEXT: s_mov_b32 s6, s8 3909; GFX6-NEXT: s_mov_b32 s7, s9 3910; GFX6-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc 3911; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 3912; GFX6-NEXT: ; return to shader part epilog 3913; 3914; GFX8-LABEL: atomic_add_i64_1d_slc: 3915; GFX8: ; %bb.0: ; %main_body 3916; GFX8-NEXT: s_mov_b32 s0, s2 3917; GFX8-NEXT: s_mov_b32 s1, s3 3918; GFX8-NEXT: s_mov_b32 s2, s4 3919; GFX8-NEXT: s_mov_b32 s3, s5 3920; GFX8-NEXT: s_mov_b32 s4, s6 3921; GFX8-NEXT: s_mov_b32 s5, s7 3922; GFX8-NEXT: s_mov_b32 s6, s8 3923; GFX8-NEXT: s_mov_b32 s7, s9 3924; GFX8-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc 3925; GFX8-NEXT: s_waitcnt vmcnt(0) 3926; GFX8-NEXT: ; return to shader part epilog 3927; 3928; GFX900-LABEL: atomic_add_i64_1d_slc: 3929; GFX900: ; %bb.0: ; %main_body 3930; GFX900-NEXT: s_mov_b32 s0, s2 3931; GFX900-NEXT: s_mov_b32 s1, s3 3932; GFX900-NEXT: s_mov_b32 s2, s4 3933; GFX900-NEXT: s_mov_b32 s3, s5 3934; GFX900-NEXT: s_mov_b32 s4, s6 3935; GFX900-NEXT: s_mov_b32 s5, s7 3936; GFX900-NEXT: s_mov_b32 s6, s8 3937; GFX900-NEXT: s_mov_b32 s7, s9 3938; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc 3939; GFX900-NEXT: s_waitcnt vmcnt(0) 3940; GFX900-NEXT: ; return to shader part epilog 3941; 3942; GFX90A-LABEL: atomic_add_i64_1d_slc: 3943; GFX90A: ; %bb.0: ; %main_body 3944; GFX90A-NEXT: s_mov_b32 s0, s2 3945; GFX90A-NEXT: s_mov_b32 s1, s3 3946; GFX90A-NEXT: s_mov_b32 s2, s4 3947; GFX90A-NEXT: s_mov_b32 s3, s5 3948; GFX90A-NEXT: s_mov_b32 s4, s6 3949; GFX90A-NEXT: s_mov_b32 s5, s7 3950; GFX90A-NEXT: s_mov_b32 s6, s8 3951; GFX90A-NEXT: s_mov_b32 s7, s9 3952; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc 3953; GFX90A-NEXT: s_waitcnt vmcnt(0) 3954; GFX90A-NEXT: ; return to shader part epilog 3955; 3956; GFX10PLUS-LABEL: atomic_add_i64_1d_slc: 3957; GFX10PLUS: ; %bb.0: ; %main_body 3958; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3959; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3960; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3961; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3962; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3963; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3964; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3965; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3966; GFX10PLUS-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc 3967; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 3968; GFX10PLUS-NEXT: ; return to shader part epilog 3969; 3970; GFX12-LABEL: atomic_add_i64_1d_slc: 3971; GFX12: ; %bb.0: ; %main_body 3972; GFX12-NEXT: s_mov_b32 s0, s2 3973; GFX12-NEXT: s_mov_b32 s1, s3 3974; GFX12-NEXT: s_mov_b32 s2, s4 3975; GFX12-NEXT: s_mov_b32 s3, s5 3976; GFX12-NEXT: s_mov_b32 s4, s6 3977; GFX12-NEXT: s_mov_b32 s5, s7 3978; GFX12-NEXT: s_mov_b32 s6, s8 3979; GFX12-NEXT: s_mov_b32 s7, s9 3980; GFX12-NEXT: image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN 3981; GFX12-NEXT: s_wait_loadcnt 0x0 3982; GFX12-NEXT: ; return to shader part epilog 3983main_body: 3984 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 3985 %out = bitcast i64 %v to <2 x float> 3986 ret <2 x float> %out 3987} 3988 3989declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3990declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3991declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3992declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3993declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3994declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3995declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3996declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3997declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3998declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 3999declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4000declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4001declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4002declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4003declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4004declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4005declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4006declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4007declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4008declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4009 4010declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4011declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4012declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4013declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4014declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4015declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4016declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4017declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4018declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4019declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4020declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4021declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4022declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4023declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4024declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4025declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4026declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4027declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4028declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4029declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 4030 4031attributes #0 = { nounwind } 4032