1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6 3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7 4; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10 7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11 8; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12 9 10define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) { 11; GFX67-LABEL: s_buffer_load_imm: 12; GFX67: ; %bb.0: ; %main_body 13; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1 14; GFX67-NEXT: s_waitcnt lgkmcnt(0) 15; GFX67-NEXT: v_mov_b32_e32 v0, s0 16; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm 17; GFX67-NEXT: s_endpgm 18; 19; GFX8910-LABEL: s_buffer_load_imm: 20; GFX8910: ; %bb.0: ; %main_body 21; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x4 22; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 23; GFX8910-NEXT: v_mov_b32_e32 v0, s0 24; GFX8910-NEXT: exp mrt0 v0, v0, v0, v0 done vm 25; GFX8910-NEXT: s_endpgm 26; 27; GFX11-LABEL: s_buffer_load_imm: 28; GFX11: ; %bb.0: ; %main_body 29; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4 30; GFX11-NEXT: s_waitcnt lgkmcnt(0) 31; GFX11-NEXT: v_mov_b32_e32 v0, s0 32; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done 33; GFX11-NEXT: s_endpgm 34; 35; GFX12-LABEL: s_buffer_load_imm: 36; GFX12: ; %bb.0: ; %main_body 37; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4 38; GFX12-NEXT: s_wait_kmcnt 0x0 39; GFX12-NEXT: v_mov_b32_e32 v0, s0 40; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done 41; GFX12-NEXT: s_endpgm 42main_body: 43 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0) 44 %bitcast = bitcast i32 %load to float 45 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) 46 ret void 47} 48 49define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) { 50; GFX678-LABEL: s_buffer_load_index: 51; GFX678: ; %bb.0: ; %main_body 52; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4 53; GFX678-NEXT: s_waitcnt lgkmcnt(0) 54; GFX678-NEXT: v_mov_b32_e32 v0, s0 55; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm 56; GFX678-NEXT: s_endpgm 57; 58; GFX910-LABEL: s_buffer_load_index: 59; GFX910: ; %bb.0: ; %main_body 60; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 61; GFX910-NEXT: s_waitcnt lgkmcnt(0) 62; GFX910-NEXT: v_mov_b32_e32 v0, s0 63; GFX910-NEXT: exp mrt0 v0, v0, v0, v0 done vm 64; GFX910-NEXT: s_endpgm 65; 66; GFX11-LABEL: s_buffer_load_index: 67; GFX11: ; %bb.0: ; %main_body 68; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 69; GFX11-NEXT: s_waitcnt lgkmcnt(0) 70; GFX11-NEXT: v_mov_b32_e32 v0, s0 71; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done 72; GFX11-NEXT: s_endpgm 73; 74; GFX12-LABEL: s_buffer_load_index: 75; GFX12: ; %bb.0: ; %main_body 76; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 77; GFX12-NEXT: s_wait_kmcnt 0x0 78; GFX12-NEXT: v_mov_b32_e32 v0, s0 79; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done 80; GFX12-NEXT: s_endpgm 81main_body: 82 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0) 83 %bitcast = bitcast i32 %load to float 84 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) 85 ret void 86} 87 88define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) { 89; GFX678910-LABEL: s_buffer_load_index_divergent: 90; GFX678910: ; %bb.0: ; %main_body 91; GFX678910-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 92; GFX678910-NEXT: s_waitcnt vmcnt(0) 93; GFX678910-NEXT: exp mrt0 v0, v0, v0, v0 done vm 94; GFX678910-NEXT: s_endpgm 95; 96; GFX11-LABEL: s_buffer_load_index_divergent: 97; GFX11: ; %bb.0: ; %main_body 98; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen 99; GFX11-NEXT: s_waitcnt vmcnt(0) 100; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done 101; GFX11-NEXT: s_endpgm 102; 103; GFX12-LABEL: s_buffer_load_index_divergent: 104; GFX12: ; %bb.0: ; %main_body 105; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen 106; GFX12-NEXT: s_wait_loadcnt 0x0 107; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done 108; GFX12-NEXT: s_endpgm 109main_body: 110 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0) 111 %bitcast = bitcast i32 %load to float 112 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) 113 ret void 114} 115 116define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) { 117; GFX67-LABEL: s_buffer_loadx2_imm: 118; GFX67: ; %bb.0: ; %main_body 119; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10 120; GFX67-NEXT: s_waitcnt lgkmcnt(0) 121; GFX67-NEXT: v_mov_b32_e32 v0, s0 122; GFX67-NEXT: v_mov_b32_e32 v1, s1 123; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm 124; GFX67-NEXT: s_endpgm 125; 126; GFX8910-LABEL: s_buffer_loadx2_imm: 127; GFX8910: ; %bb.0: ; %main_body 128; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40 129; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 130; GFX8910-NEXT: v_mov_b32_e32 v0, s0 131; GFX8910-NEXT: v_mov_b32_e32 v1, s1 132; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm 133; GFX8910-NEXT: s_endpgm 134; 135; GFX11-LABEL: s_buffer_loadx2_imm: 136; GFX11: ; %bb.0: ; %main_body 137; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40 138; GFX11-NEXT: s_waitcnt lgkmcnt(0) 139; GFX11-NEXT: v_mov_b32_e32 v0, s0 140; GFX11-NEXT: v_mov_b32_e32 v1, s1 141; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done 142; GFX11-NEXT: s_endpgm 143; 144; GFX12-LABEL: s_buffer_loadx2_imm: 145; GFX12: ; %bb.0: ; %main_body 146; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40 147; GFX12-NEXT: s_wait_kmcnt 0x0 148; GFX12-NEXT: v_mov_b32_e32 v0, s0 149; GFX12-NEXT: v_mov_b32_e32 v1, s1 150; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done 151; GFX12-NEXT: s_endpgm 152main_body: 153 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0) 154 %bitcast = bitcast <2 x i32> %load to <2 x float> 155 %x = extractelement <2 x float> %bitcast, i32 0 156 %y = extractelement <2 x float> %bitcast, i32 1 157 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) 158 ret void 159} 160 161define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) { 162; GFX678-LABEL: s_buffer_loadx2_index: 163; GFX678: ; %bb.0: ; %main_body 164; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 165; GFX678-NEXT: s_waitcnt lgkmcnt(0) 166; GFX678-NEXT: v_mov_b32_e32 v0, s0 167; GFX678-NEXT: v_mov_b32_e32 v1, s1 168; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm 169; GFX678-NEXT: s_endpgm 170; 171; GFX910-LABEL: s_buffer_loadx2_index: 172; GFX910: ; %bb.0: ; %main_body 173; GFX910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0 174; GFX910-NEXT: s_waitcnt lgkmcnt(0) 175; GFX910-NEXT: v_mov_b32_e32 v0, s0 176; GFX910-NEXT: v_mov_b32_e32 v1, s1 177; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm 178; GFX910-NEXT: s_endpgm 179; 180; GFX11-LABEL: s_buffer_loadx2_index: 181; GFX11: ; %bb.0: ; %main_body 182; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0 183; GFX11-NEXT: s_waitcnt lgkmcnt(0) 184; GFX11-NEXT: v_mov_b32_e32 v0, s0 185; GFX11-NEXT: v_mov_b32_e32 v1, s1 186; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done 187; GFX11-NEXT: s_endpgm 188; 189; GFX12-LABEL: s_buffer_loadx2_index: 190; GFX12: ; %bb.0: ; %main_body 191; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0 192; GFX12-NEXT: s_wait_kmcnt 0x0 193; GFX12-NEXT: v_mov_b32_e32 v0, s0 194; GFX12-NEXT: v_mov_b32_e32 v1, s1 195; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done 196; GFX12-NEXT: s_endpgm 197main_body: 198 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0) 199 %bitcast = bitcast <2 x i32> %load to <2 x float> 200 %x = extractelement <2 x float> %bitcast, i32 0 201 %y = extractelement <2 x float> %bitcast, i32 1 202 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) 203 ret void 204} 205 206define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) { 207; GFX678910-LABEL: s_buffer_loadx2_index_divergent: 208; GFX678910: ; %bb.0: ; %main_body 209; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen 210; GFX678910-NEXT: s_waitcnt vmcnt(0) 211; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm 212; GFX678910-NEXT: s_endpgm 213; 214; GFX11-LABEL: s_buffer_loadx2_index_divergent: 215; GFX11: ; %bb.0: ; %main_body 216; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen 217; GFX11-NEXT: s_waitcnt vmcnt(0) 218; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done 219; GFX11-NEXT: s_endpgm 220; 221; GFX12-LABEL: s_buffer_loadx2_index_divergent: 222; GFX12: ; %bb.0: ; %main_body 223; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen 224; GFX12-NEXT: s_wait_loadcnt 0x0 225; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done 226; GFX12-NEXT: s_endpgm 227main_body: 228 %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0) 229 %bitcast = bitcast <2 x i32> %load to <2 x float> 230 %x = extractelement <2 x float> %bitcast, i32 0 231 %y = extractelement <2 x float> %bitcast, i32 1 232 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) 233 ret void 234} 235 236define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) { 237; GFX67-LABEL: s_buffer_loadx3_imm: 238; GFX67: ; %bb.0: ; %main_body 239; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10 240; GFX67-NEXT: s_waitcnt lgkmcnt(0) 241; GFX67-NEXT: v_mov_b32_e32 v0, s0 242; GFX67-NEXT: v_mov_b32_e32 v1, s1 243; GFX67-NEXT: v_mov_b32_e32 v2, s2 244; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm 245; GFX67-NEXT: s_endpgm 246; 247; GFX8910-LABEL: s_buffer_loadx3_imm: 248; GFX8910: ; %bb.0: ; %main_body 249; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40 250; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 251; GFX8910-NEXT: v_mov_b32_e32 v0, s0 252; GFX8910-NEXT: v_mov_b32_e32 v1, s1 253; GFX8910-NEXT: v_mov_b32_e32 v2, s2 254; GFX8910-NEXT: exp mrt0 v0, v1, v2, v0 done vm 255; GFX8910-NEXT: s_endpgm 256; 257; GFX11-LABEL: s_buffer_loadx3_imm: 258; GFX11: ; %bb.0: ; %main_body 259; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x40 260; GFX11-NEXT: s_waitcnt lgkmcnt(0) 261; GFX11-NEXT: v_mov_b32_e32 v0, s0 262; GFX11-NEXT: v_mov_b32_e32 v1, s1 263; GFX11-NEXT: v_mov_b32_e32 v2, s2 264; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done 265; GFX11-NEXT: s_endpgm 266; 267; GFX12-LABEL: s_buffer_loadx3_imm: 268; GFX12: ; %bb.0: ; %main_body 269; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], 0x40 270; GFX12-NEXT: s_wait_kmcnt 0x0 271; GFX12-NEXT: v_mov_b32_e32 v0, s0 272; GFX12-NEXT: v_mov_b32_e32 v1, s1 273; GFX12-NEXT: v_mov_b32_e32 v2, s2 274; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done 275; GFX12-NEXT: s_endpgm 276main_body: 277 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0) 278 %bitcast = bitcast <3 x i32> %load to <3 x float> 279 %x = extractelement <3 x float> %bitcast, i32 0 280 %y = extractelement <3 x float> %bitcast, i32 1 281 %z = extractelement <3 x float> %bitcast, i32 2 282 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true) 283 ret void 284} 285 286define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) { 287; GFX678-LABEL: s_buffer_loadx3_index: 288; GFX678: ; %bb.0: ; %main_body 289; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 290; GFX678-NEXT: s_waitcnt lgkmcnt(0) 291; GFX678-NEXT: v_mov_b32_e32 v0, s0 292; GFX678-NEXT: v_mov_b32_e32 v1, s1 293; GFX678-NEXT: v_mov_b32_e32 v2, s2 294; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm 295; GFX678-NEXT: s_endpgm 296; 297; GFX910-LABEL: s_buffer_loadx3_index: 298; GFX910: ; %bb.0: ; %main_body 299; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0 300; GFX910-NEXT: s_waitcnt lgkmcnt(0) 301; GFX910-NEXT: v_mov_b32_e32 v0, s0 302; GFX910-NEXT: v_mov_b32_e32 v1, s1 303; GFX910-NEXT: v_mov_b32_e32 v2, s2 304; GFX910-NEXT: exp mrt0 v0, v1, v2, v0 done vm 305; GFX910-NEXT: s_endpgm 306; 307; GFX11-LABEL: s_buffer_loadx3_index: 308; GFX11: ; %bb.0: ; %main_body 309; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0 310; GFX11-NEXT: s_waitcnt lgkmcnt(0) 311; GFX11-NEXT: v_mov_b32_e32 v0, s0 312; GFX11-NEXT: v_mov_b32_e32 v1, s1 313; GFX11-NEXT: v_mov_b32_e32 v2, s2 314; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done 315; GFX11-NEXT: s_endpgm 316; 317; GFX12-LABEL: s_buffer_loadx3_index: 318; GFX12: ; %bb.0: ; %main_body 319; GFX12-NEXT: s_buffer_load_b96 s[0:2], s[0:3], s4 offset:0x0 320; GFX12-NEXT: s_wait_kmcnt 0x0 321; GFX12-NEXT: v_mov_b32_e32 v0, s0 322; GFX12-NEXT: v_mov_b32_e32 v1, s1 323; GFX12-NEXT: v_mov_b32_e32 v2, s2 324; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done 325; GFX12-NEXT: s_endpgm 326main_body: 327 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0) 328 %bitcast = bitcast <3 x i32> %load to <3 x float> 329 %x = extractelement <3 x float> %bitcast, i32 0 330 %y = extractelement <3 x float> %bitcast, i32 1 331 %z = extractelement <3 x float> %bitcast, i32 2 332 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true) 333 ret void 334} 335 336define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) { 337; GFX6-LABEL: s_buffer_loadx3_index_divergent: 338; GFX6: ; %bb.0: ; %main_body 339; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen 340; GFX6-NEXT: s_waitcnt vmcnt(0) 341; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm 342; GFX6-NEXT: s_endpgm 343; 344; GFX78910-LABEL: s_buffer_loadx3_index_divergent: 345; GFX78910: ; %bb.0: ; %main_body 346; GFX78910-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen 347; GFX78910-NEXT: s_waitcnt vmcnt(0) 348; GFX78910-NEXT: exp mrt0 v0, v1, v2, v0 done vm 349; GFX78910-NEXT: s_endpgm 350; 351; GFX11-LABEL: s_buffer_loadx3_index_divergent: 352; GFX11: ; %bb.0: ; %main_body 353; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen 354; GFX11-NEXT: s_waitcnt vmcnt(0) 355; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done 356; GFX11-NEXT: s_endpgm 357; 358; GFX12-LABEL: s_buffer_loadx3_index_divergent: 359; GFX12: ; %bb.0: ; %main_body 360; GFX12-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], null offen 361; GFX12-NEXT: s_wait_loadcnt 0x0 362; GFX12-NEXT: export mrt0 v0, v1, v2, v0 done 363; GFX12-NEXT: s_endpgm 364main_body: 365 %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0) 366 %bitcast = bitcast <3 x i32> %load to <3 x float> 367 %x = extractelement <3 x float> %bitcast, i32 0 368 %y = extractelement <3 x float> %bitcast, i32 1 369 %z = extractelement <3 x float> %bitcast, i32 2 370 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true) 371 ret void 372} 373 374define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) { 375; GFX67-LABEL: s_buffer_loadx4_imm: 376; GFX67: ; %bb.0: ; %main_body 377; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32 378; GFX67-NEXT: s_waitcnt lgkmcnt(0) 379; GFX67-NEXT: v_mov_b32_e32 v0, s0 380; GFX67-NEXT: v_mov_b32_e32 v1, s1 381; GFX67-NEXT: v_mov_b32_e32 v2, s2 382; GFX67-NEXT: v_mov_b32_e32 v3, s3 383; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm 384; GFX67-NEXT: s_endpgm 385; 386; GFX8910-LABEL: s_buffer_loadx4_imm: 387; GFX8910: ; %bb.0: ; %main_body 388; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8 389; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 390; GFX8910-NEXT: v_mov_b32_e32 v0, s0 391; GFX8910-NEXT: v_mov_b32_e32 v1, s1 392; GFX8910-NEXT: v_mov_b32_e32 v2, s2 393; GFX8910-NEXT: v_mov_b32_e32 v3, s3 394; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm 395; GFX8910-NEXT: s_endpgm 396; 397; GFX11-LABEL: s_buffer_loadx4_imm: 398; GFX11: ; %bb.0: ; %main_body 399; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8 400; GFX11-NEXT: s_waitcnt lgkmcnt(0) 401; GFX11-NEXT: v_mov_b32_e32 v0, s0 402; GFX11-NEXT: v_mov_b32_e32 v1, s1 403; GFX11-NEXT: v_mov_b32_e32 v2, s2 404; GFX11-NEXT: v_mov_b32_e32 v3, s3 405; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 406; GFX11-NEXT: s_endpgm 407; 408; GFX12-LABEL: s_buffer_loadx4_imm: 409; GFX12: ; %bb.0: ; %main_body 410; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8 411; GFX12-NEXT: s_wait_kmcnt 0x0 412; GFX12-NEXT: v_mov_b32_e32 v0, s0 413; GFX12-NEXT: v_mov_b32_e32 v1, s1 414; GFX12-NEXT: v_mov_b32_e32 v2, s2 415; GFX12-NEXT: v_mov_b32_e32 v3, s3 416; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done 417; GFX12-NEXT: s_endpgm 418main_body: 419 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0) 420 %bitcast = bitcast <4 x i32> %load to <4 x float> 421 %x = extractelement <4 x float> %bitcast, i32 0 422 %y = extractelement <4 x float> %bitcast, i32 1 423 %z = extractelement <4 x float> %bitcast, i32 2 424 %w = extractelement <4 x float> %bitcast, i32 3 425 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 426 ret void 427} 428 429define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) { 430; GFX678-LABEL: s_buffer_loadx4_index: 431; GFX678: ; %bb.0: ; %main_body 432; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 433; GFX678-NEXT: s_waitcnt lgkmcnt(0) 434; GFX678-NEXT: v_mov_b32_e32 v0, s0 435; GFX678-NEXT: v_mov_b32_e32 v1, s1 436; GFX678-NEXT: v_mov_b32_e32 v2, s2 437; GFX678-NEXT: v_mov_b32_e32 v3, s3 438; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm 439; GFX678-NEXT: s_endpgm 440; 441; GFX910-LABEL: s_buffer_loadx4_index: 442; GFX910: ; %bb.0: ; %main_body 443; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0 444; GFX910-NEXT: s_waitcnt lgkmcnt(0) 445; GFX910-NEXT: v_mov_b32_e32 v0, s0 446; GFX910-NEXT: v_mov_b32_e32 v1, s1 447; GFX910-NEXT: v_mov_b32_e32 v2, s2 448; GFX910-NEXT: v_mov_b32_e32 v3, s3 449; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm 450; GFX910-NEXT: s_endpgm 451; 452; GFX11-LABEL: s_buffer_loadx4_index: 453; GFX11: ; %bb.0: ; %main_body 454; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0 455; GFX11-NEXT: s_waitcnt lgkmcnt(0) 456; GFX11-NEXT: v_mov_b32_e32 v0, s0 457; GFX11-NEXT: v_mov_b32_e32 v1, s1 458; GFX11-NEXT: v_mov_b32_e32 v2, s2 459; GFX11-NEXT: v_mov_b32_e32 v3, s3 460; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 461; GFX11-NEXT: s_endpgm 462; 463; GFX12-LABEL: s_buffer_loadx4_index: 464; GFX12: ; %bb.0: ; %main_body 465; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0 466; GFX12-NEXT: s_wait_kmcnt 0x0 467; GFX12-NEXT: v_mov_b32_e32 v0, s0 468; GFX12-NEXT: v_mov_b32_e32 v1, s1 469; GFX12-NEXT: v_mov_b32_e32 v2, s2 470; GFX12-NEXT: v_mov_b32_e32 v3, s3 471; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done 472; GFX12-NEXT: s_endpgm 473main_body: 474 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0) 475 %bitcast = bitcast <4 x i32> %load to <4 x float> 476 %x = extractelement <4 x float> %bitcast, i32 0 477 %y = extractelement <4 x float> %bitcast, i32 1 478 %z = extractelement <4 x float> %bitcast, i32 2 479 %w = extractelement <4 x float> %bitcast, i32 3 480 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 481 ret void 482} 483 484define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) { 485; GFX678910-LABEL: s_buffer_loadx4_index_divergent: 486; GFX678910: ; %bb.0: ; %main_body 487; GFX678910-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen 488; GFX678910-NEXT: s_waitcnt vmcnt(0) 489; GFX678910-NEXT: exp mrt0 v0, v1, v2, v3 done vm 490; GFX678910-NEXT: s_endpgm 491; 492; GFX11-LABEL: s_buffer_loadx4_index_divergent: 493; GFX11: ; %bb.0: ; %main_body 494; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen 495; GFX11-NEXT: s_waitcnt vmcnt(0) 496; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 497; GFX11-NEXT: s_endpgm 498; 499; GFX12-LABEL: s_buffer_loadx4_index_divergent: 500; GFX12: ; %bb.0: ; %main_body 501; GFX12-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], null offen 502; GFX12-NEXT: s_wait_loadcnt 0x0 503; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done 504; GFX12-NEXT: s_endpgm 505main_body: 506 %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0) 507 %bitcast = bitcast <4 x i32> %load to <4 x float> 508 %x = extractelement <4 x float> %bitcast, i32 0 509 %y = extractelement <4 x float> %bitcast, i32 1 510 %z = extractelement <4 x float> %bitcast, i32 2 511 %w = extractelement <4 x float> %bitcast, i32 3 512 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 513 ret void 514} 515 516define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) { 517; GFX67-LABEL: s_buffer_load_imm_mergex2: 518; GFX67: ; %bb.0: ; %main_body 519; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1 520; GFX67-NEXT: s_waitcnt lgkmcnt(0) 521; GFX67-NEXT: v_mov_b32_e32 v0, s0 522; GFX67-NEXT: v_mov_b32_e32 v1, s1 523; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm 524; GFX67-NEXT: s_endpgm 525; 526; GFX8-LABEL: s_buffer_load_imm_mergex2: 527; GFX8: ; %bb.0: ; %main_body 528; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4 529; GFX8-NEXT: s_waitcnt lgkmcnt(0) 530; GFX8-NEXT: v_mov_b32_e32 v0, s0 531; GFX8-NEXT: v_mov_b32_e32 v1, s1 532; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm 533; GFX8-NEXT: s_endpgm 534; 535; GFX910-LABEL: s_buffer_load_imm_mergex2: 536; GFX910: ; %bb.0: ; %main_body 537; GFX910-NEXT: s_buffer_load_dwordx2 s[4:5], s[0:3], 0x4 538; GFX910-NEXT: s_waitcnt lgkmcnt(0) 539; GFX910-NEXT: v_mov_b32_e32 v0, s4 540; GFX910-NEXT: v_mov_b32_e32 v1, s5 541; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm 542; GFX910-NEXT: s_endpgm 543; 544; GFX11-LABEL: s_buffer_load_imm_mergex2: 545; GFX11: ; %bb.0: ; %main_body 546; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4 547; GFX11-NEXT: s_waitcnt lgkmcnt(0) 548; GFX11-NEXT: v_mov_b32_e32 v0, s0 549; GFX11-NEXT: v_mov_b32_e32 v1, s1 550; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done 551; GFX11-NEXT: s_endpgm 552; 553; GFX12-LABEL: s_buffer_load_imm_mergex2: 554; GFX12: ; %bb.0: ; %main_body 555; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4 556; GFX12-NEXT: s_wait_kmcnt 0x0 557; GFX12-NEXT: v_mov_b32_e32 v0, s0 558; GFX12-NEXT: v_mov_b32_e32 v1, s1 559; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done 560; GFX12-NEXT: s_endpgm 561main_body: 562 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0) 563 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0) 564 %x = bitcast i32 %load0 to float 565 %y = bitcast i32 %load1 to float 566 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) 567 ret void 568} 569 570define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) { 571; GFX67-LABEL: s_buffer_load_imm_mergex4: 572; GFX67: ; %bb.0: ; %main_body 573; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2 574; GFX67-NEXT: s_waitcnt lgkmcnt(0) 575; GFX67-NEXT: v_mov_b32_e32 v0, s0 576; GFX67-NEXT: v_mov_b32_e32 v1, s1 577; GFX67-NEXT: v_mov_b32_e32 v2, s2 578; GFX67-NEXT: v_mov_b32_e32 v3, s3 579; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm 580; GFX67-NEXT: s_endpgm 581; 582; GFX8-LABEL: s_buffer_load_imm_mergex4: 583; GFX8: ; %bb.0: ; %main_body 584; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8 585; GFX8-NEXT: s_waitcnt lgkmcnt(0) 586; GFX8-NEXT: v_mov_b32_e32 v0, s0 587; GFX8-NEXT: v_mov_b32_e32 v1, s1 588; GFX8-NEXT: v_mov_b32_e32 v2, s2 589; GFX8-NEXT: v_mov_b32_e32 v3, s3 590; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm 591; GFX8-NEXT: s_endpgm 592; 593; GFX910-LABEL: s_buffer_load_imm_mergex4: 594; GFX910: ; %bb.0: ; %main_body 595; GFX910-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x8 596; GFX910-NEXT: s_waitcnt lgkmcnt(0) 597; GFX910-NEXT: v_mov_b32_e32 v0, s4 598; GFX910-NEXT: v_mov_b32_e32 v1, s5 599; GFX910-NEXT: v_mov_b32_e32 v2, s6 600; GFX910-NEXT: v_mov_b32_e32 v3, s7 601; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm 602; GFX910-NEXT: s_endpgm 603; 604; GFX11-LABEL: s_buffer_load_imm_mergex4: 605; GFX11: ; %bb.0: ; %main_body 606; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8 607; GFX11-NEXT: s_waitcnt lgkmcnt(0) 608; GFX11-NEXT: v_mov_b32_e32 v0, s0 609; GFX11-NEXT: v_mov_b32_e32 v1, s1 610; GFX11-NEXT: v_mov_b32_e32 v2, s2 611; GFX11-NEXT: v_mov_b32_e32 v3, s3 612; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 613; GFX11-NEXT: s_endpgm 614; 615; GFX12-LABEL: s_buffer_load_imm_mergex4: 616; GFX12: ; %bb.0: ; %main_body 617; GFX12-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8 618; GFX12-NEXT: s_wait_kmcnt 0x0 619; GFX12-NEXT: v_mov_b32_e32 v0, s0 620; GFX12-NEXT: v_mov_b32_e32 v1, s1 621; GFX12-NEXT: v_mov_b32_e32 v2, s2 622; GFX12-NEXT: v_mov_b32_e32 v3, s3 623; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done 624; GFX12-NEXT: s_endpgm 625main_body: 626 %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0) 627 %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0) 628 %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0) 629 %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0) 630 %x = bitcast i32 %load0 to float 631 %y = bitcast i32 %load1 to float 632 %z = bitcast i32 %load2 to float 633 %w = bitcast i32 %load3 to float 634 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 635 ret void 636} 637 638@gv = external addrspace(1) global i32 639 640define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) { 641; GFX6-LABEL: s_buffer_load_index_across_bb: 642; GFX6: ; %bb.0: ; %main_body 643; GFX6-NEXT: s_getpc_b64 s[4:5] 644; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 645; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 646; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 647; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 648; GFX6-NEXT: s_mov_b32 s7, 0xf000 649; GFX6-NEXT: s_mov_b32 s6, -1 650; GFX6-NEXT: s_waitcnt lgkmcnt(0) 651; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 652; GFX6-NEXT: s_waitcnt expcnt(0) 653; GFX6-NEXT: v_or_b32_e32 v0, 8, v0 654; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 655; GFX6-NEXT: s_waitcnt vmcnt(0) 656; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm 657; GFX6-NEXT: s_endpgm 658; 659; GFX7-LABEL: s_buffer_load_index_across_bb: 660; GFX7: ; %bb.0: ; %main_body 661; GFX7-NEXT: s_getpc_b64 s[4:5] 662; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 663; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 664; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 665; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 666; GFX7-NEXT: s_mov_b32 s7, 0xf000 667; GFX7-NEXT: s_mov_b32 s6, -1 668; GFX7-NEXT: s_waitcnt lgkmcnt(0) 669; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 670; GFX7-NEXT: v_or_b32_e32 v0, 8, v0 671; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 672; GFX7-NEXT: s_waitcnt vmcnt(0) 673; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm 674; GFX7-NEXT: s_endpgm 675; 676; GFX8-LABEL: s_buffer_load_index_across_bb: 677; GFX8: ; %bb.0: ; %main_body 678; GFX8-NEXT: s_getpc_b64 s[4:5] 679; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 680; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 681; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 682; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 683; GFX8-NEXT: s_waitcnt lgkmcnt(0) 684; GFX8-NEXT: v_mov_b32_e32 v1, s4 685; GFX8-NEXT: v_mov_b32_e32 v2, s5 686; GFX8-NEXT: flat_store_dword v[1:2], v0 687; GFX8-NEXT: v_or_b32_e32 v0, 8, v0 688; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 689; GFX8-NEXT: s_waitcnt vmcnt(0) 690; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm 691; GFX8-NEXT: s_endpgm 692; 693; GFX9-LABEL: s_buffer_load_index_across_bb: 694; GFX9: ; %bb.0: ; %main_body 695; GFX9-NEXT: s_getpc_b64 s[4:5] 696; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 697; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 698; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 699; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0 700; GFX9-NEXT: v_mov_b32_e32 v1, 0 701; GFX9-NEXT: s_waitcnt lgkmcnt(0) 702; GFX9-NEXT: global_store_dword v1, v0, s[4:5] 703; GFX9-NEXT: v_or_b32_e32 v0, 8, v0 704; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 705; GFX9-NEXT: s_waitcnt vmcnt(0) 706; GFX9-NEXT: exp mrt0 v0, v0, v0, v0 done vm 707; GFX9-NEXT: s_endpgm 708; 709; GFX10-LABEL: s_buffer_load_index_across_bb: 710; GFX10: ; %bb.0: ; %main_body 711; GFX10-NEXT: s_getpc_b64 s[4:5] 712; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 713; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 714; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 715; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 716; GFX10-NEXT: v_mov_b32_e32 v1, 0 717; GFX10-NEXT: s_waitcnt lgkmcnt(0) 718; GFX10-NEXT: global_store_dword v1, v0, s[4:5] 719; GFX10-NEXT: v_or_b32_e32 v0, 8, v0 720; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 721; GFX10-NEXT: s_waitcnt vmcnt(0) 722; GFX10-NEXT: exp mrt0 v0, v0, v0, v0 done vm 723; GFX10-NEXT: s_endpgm 724; 725; GFX11-LABEL: s_buffer_load_index_across_bb: 726; GFX11: ; %bb.0: ; %main_body 727; GFX11-NEXT: s_getpc_b64 s[4:5] 728; GFX11-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 729; GFX11-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 730; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 731; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 732; GFX11-NEXT: v_mov_b32_e32 v1, 0 733; GFX11-NEXT: s_waitcnt lgkmcnt(0) 734; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] 735; GFX11-NEXT: v_or_b32_e32 v0, 8, v0 736; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen 737; GFX11-NEXT: s_waitcnt vmcnt(0) 738; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done 739; GFX11-NEXT: s_endpgm 740; 741; GFX12-LABEL: s_buffer_load_index_across_bb: 742; GFX12: ; %bb.0: ; %main_body 743; GFX12-NEXT: s_getpc_b64 s[4:5] 744; GFX12-NEXT: s_sext_i32_i16 s5, s5 745; GFX12-NEXT: s_add_co_u32 s4, s4, gv@gotpcrel32@lo+8 746; GFX12-NEXT: s_add_co_ci_u32 s5, s5, gv@gotpcrel32@hi+16 747; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 748; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 749; GFX12-NEXT: v_mov_b32_e32 v1, 0 750; GFX12-NEXT: s_wait_kmcnt 0x0 751; GFX12-NEXT: global_store_b32 v1, v0, s[4:5] 752; GFX12-NEXT: v_or_b32_e32 v0, 8, v0 753; GFX12-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen 754; GFX12-NEXT: s_wait_loadcnt 0x0 755; GFX12-NEXT: export mrt0 v0, v0, v0, v0 done 756; GFX12-NEXT: s_endpgm 757main_body: 758 %tmp = shl i32 %index, 4 759 store i32 %tmp, ptr addrspace(1) @gv 760 br label %bb1 761 762bb1: ; preds = %main_body 763 %tmp1 = or i32 %tmp, 8 764 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0) 765 %bitcast = bitcast i32 %load to float 766 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) 767 ret void 768} 769 770define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) { 771; GFX678910-LABEL: s_buffer_load_index_across_bb_merged: 772; GFX678910: ; %bb.0: ; %main_body 773; GFX678910-NEXT: v_lshlrev_b32_e32 v0, 4, v0 774; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8 775; GFX678910-NEXT: s_waitcnt vmcnt(0) 776; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm 777; GFX678910-NEXT: s_endpgm 778; 779; GFX11-LABEL: s_buffer_load_index_across_bb_merged: 780; GFX11: ; %bb.0: ; %main_body 781; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 782; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8 783; GFX11-NEXT: s_waitcnt vmcnt(0) 784; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done 785; GFX11-NEXT: s_endpgm 786; 787; GFX12-LABEL: s_buffer_load_index_across_bb_merged: 788; GFX12: ; %bb.0: ; %main_body 789; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 790; GFX12-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], null offen offset:8 791; GFX12-NEXT: s_wait_loadcnt 0x0 792; GFX12-NEXT: export mrt0 v0, v1, v0, v0 done 793; GFX12-NEXT: s_endpgm 794main_body: 795 %tmp = shl i32 %index, 4 796 br label %bb1 797 798bb1: ; preds = %main_body 799 %tmp1 = or i32 %tmp, 8 800 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0) 801 %tmp2 = or i32 %tmp1, 4 802 %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0) 803 %bitcast = bitcast i32 %load to float 804 %bitcast2 = bitcast i32 %load2 to float 805 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true) 806 ret void 807} 808 809define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) { 810; GFX6-LABEL: s_buffer_load_imm_neg1: 811; GFX6: ; %bb.0: 812; GFX6-NEXT: s_mov_b32 s4, -1 813; GFX6-NEXT: s_nop 3 814; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 815; GFX6-NEXT: s_waitcnt lgkmcnt(0) 816; GFX6-NEXT: ; return to shader part epilog 817; 818; GFX78-LABEL: s_buffer_load_imm_neg1: 819; GFX78: ; %bb.0: 820; GFX78-NEXT: s_mov_b32 s4, -1 821; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4 822; GFX78-NEXT: s_waitcnt lgkmcnt(0) 823; GFX78-NEXT: ; return to shader part epilog 824; 825; GFX910-LABEL: s_buffer_load_imm_neg1: 826; GFX910: ; %bb.0: 827; GFX910-NEXT: s_mov_b32 s4, -1 828; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 829; GFX910-NEXT: s_waitcnt lgkmcnt(0) 830; GFX910-NEXT: ; return to shader part epilog 831; 832; GFX11-LABEL: s_buffer_load_imm_neg1: 833; GFX11: ; %bb.0: 834; GFX11-NEXT: s_mov_b32 s4, -1 835; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 836; GFX11-NEXT: s_waitcnt lgkmcnt(0) 837; GFX11-NEXT: ; return to shader part epilog 838; 839; GFX12-LABEL: s_buffer_load_imm_neg1: 840; GFX12: ; %bb.0: 841; GFX12-NEXT: s_mov_b32 s4, -1 842; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 843; GFX12-NEXT: s_wait_kmcnt 0x0 844; GFX12-NEXT: ; return to shader part epilog 845 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) 846 ret i32 %load 847} 848 849define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) { 850; GFX6-LABEL: s_buffer_load_imm_neg4: 851; GFX6: ; %bb.0: 852; GFX6-NEXT: s_mov_b32 s4, -4 853; GFX6-NEXT: s_nop 3 854; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 855; GFX6-NEXT: s_waitcnt lgkmcnt(0) 856; GFX6-NEXT: ; return to shader part epilog 857; 858; GFX7-LABEL: s_buffer_load_imm_neg4: 859; GFX7: ; %bb.0: 860; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff 861; GFX7-NEXT: s_waitcnt lgkmcnt(0) 862; GFX7-NEXT: ; return to shader part epilog 863; 864; GFX8-LABEL: s_buffer_load_imm_neg4: 865; GFX8: ; %bb.0: 866; GFX8-NEXT: s_mov_b32 s4, -4 867; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 868; GFX8-NEXT: s_waitcnt lgkmcnt(0) 869; GFX8-NEXT: ; return to shader part epilog 870; 871; GFX910-LABEL: s_buffer_load_imm_neg4: 872; GFX910: ; %bb.0: 873; GFX910-NEXT: s_mov_b32 s4, -4 874; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 875; GFX910-NEXT: s_waitcnt lgkmcnt(0) 876; GFX910-NEXT: ; return to shader part epilog 877; 878; GFX11-LABEL: s_buffer_load_imm_neg4: 879; GFX11: ; %bb.0: 880; GFX11-NEXT: s_mov_b32 s4, -4 881; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 882; GFX11-NEXT: s_waitcnt lgkmcnt(0) 883; GFX11-NEXT: ; return to shader part epilog 884; 885; GFX12-LABEL: s_buffer_load_imm_neg4: 886; GFX12: ; %bb.0: 887; GFX12-NEXT: s_mov_b32 s4, -4 888; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 889; GFX12-NEXT: s_wait_kmcnt 0x0 890; GFX12-NEXT: ; return to shader part epilog 891 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) 892 ret i32 %load 893} 894 895define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) { 896; GFX6-LABEL: s_buffer_load_imm_neg8: 897; GFX6: ; %bb.0: 898; GFX6-NEXT: s_mov_b32 s4, -8 899; GFX6-NEXT: s_nop 3 900; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 901; GFX6-NEXT: s_waitcnt lgkmcnt(0) 902; GFX6-NEXT: ; return to shader part epilog 903; 904; GFX7-LABEL: s_buffer_load_imm_neg8: 905; GFX7: ; %bb.0: 906; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe 907; GFX7-NEXT: s_waitcnt lgkmcnt(0) 908; GFX7-NEXT: ; return to shader part epilog 909; 910; GFX8-LABEL: s_buffer_load_imm_neg8: 911; GFX8: ; %bb.0: 912; GFX8-NEXT: s_mov_b32 s4, -8 913; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 914; GFX8-NEXT: s_waitcnt lgkmcnt(0) 915; GFX8-NEXT: ; return to shader part epilog 916; 917; GFX910-LABEL: s_buffer_load_imm_neg8: 918; GFX910: ; %bb.0: 919; GFX910-NEXT: s_mov_b32 s4, -8 920; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 921; GFX910-NEXT: s_waitcnt lgkmcnt(0) 922; GFX910-NEXT: ; return to shader part epilog 923; 924; GFX11-LABEL: s_buffer_load_imm_neg8: 925; GFX11: ; %bb.0: 926; GFX11-NEXT: s_mov_b32 s4, -8 927; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 928; GFX11-NEXT: s_waitcnt lgkmcnt(0) 929; GFX11-NEXT: ; return to shader part epilog 930; 931; GFX12-LABEL: s_buffer_load_imm_neg8: 932; GFX12: ; %bb.0: 933; GFX12-NEXT: s_mov_b32 s4, -8 934; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 935; GFX12-NEXT: s_wait_kmcnt 0x0 936; GFX12-NEXT: ; return to shader part epilog 937 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) 938 ret i32 %load 939} 940 941define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) { 942; GFX6-LABEL: s_buffer_load_imm_bit31: 943; GFX6: ; %bb.0: 944; GFX6-NEXT: s_brev_b32 s4, 1 945; GFX6-NEXT: s_nop 3 946; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 947; GFX6-NEXT: s_waitcnt lgkmcnt(0) 948; GFX6-NEXT: ; return to shader part epilog 949; 950; GFX7-LABEL: s_buffer_load_imm_bit31: 951; GFX7: ; %bb.0: 952; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000 953; GFX7-NEXT: s_waitcnt lgkmcnt(0) 954; GFX7-NEXT: ; return to shader part epilog 955; 956; GFX8-LABEL: s_buffer_load_imm_bit31: 957; GFX8: ; %bb.0: 958; GFX8-NEXT: s_brev_b32 s4, 1 959; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 960; GFX8-NEXT: s_waitcnt lgkmcnt(0) 961; GFX8-NEXT: ; return to shader part epilog 962; 963; GFX910-LABEL: s_buffer_load_imm_bit31: 964; GFX910: ; %bb.0: 965; GFX910-NEXT: s_brev_b32 s4, 1 966; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 967; GFX910-NEXT: s_waitcnt lgkmcnt(0) 968; GFX910-NEXT: ; return to shader part epilog 969; 970; GFX11-LABEL: s_buffer_load_imm_bit31: 971; GFX11: ; %bb.0: 972; GFX11-NEXT: s_brev_b32 s4, 1 973; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 974; GFX11-NEXT: s_waitcnt lgkmcnt(0) 975; GFX11-NEXT: ; return to shader part epilog 976; 977; GFX12-LABEL: s_buffer_load_imm_bit31: 978; GFX12: ; %bb.0: 979; GFX12-NEXT: s_brev_b32 s4, 1 980; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 981; GFX12-NEXT: s_wait_kmcnt 0x0 982; GFX12-NEXT: ; return to shader part epilog 983 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) 984 ret i32 %load 985} 986 987define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) { 988; GFX6-LABEL: s_buffer_load_imm_bit30: 989; GFX6: ; %bb.0: 990; GFX6-NEXT: s_mov_b32 s4, 2.0 991; GFX6-NEXT: s_nop 3 992; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 993; GFX6-NEXT: s_waitcnt lgkmcnt(0) 994; GFX6-NEXT: ; return to shader part epilog 995; 996; GFX7-LABEL: s_buffer_load_imm_bit30: 997; GFX7: ; %bb.0: 998; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000 999; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1000; GFX7-NEXT: ; return to shader part epilog 1001; 1002; GFX8-LABEL: s_buffer_load_imm_bit30: 1003; GFX8: ; %bb.0: 1004; GFX8-NEXT: s_mov_b32 s4, 2.0 1005; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1006; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1007; GFX8-NEXT: ; return to shader part epilog 1008; 1009; GFX910-LABEL: s_buffer_load_imm_bit30: 1010; GFX910: ; %bb.0: 1011; GFX910-NEXT: s_mov_b32 s4, 2.0 1012; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1013; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1014; GFX910-NEXT: ; return to shader part epilog 1015; 1016; GFX11-LABEL: s_buffer_load_imm_bit30: 1017; GFX11: ; %bb.0: 1018; GFX11-NEXT: s_mov_b32 s4, 2.0 1019; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1020; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1021; GFX11-NEXT: ; return to shader part epilog 1022; 1023; GFX12-LABEL: s_buffer_load_imm_bit30: 1024; GFX12: ; %bb.0: 1025; GFX12-NEXT: s_mov_b32 s4, 2.0 1026; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1027; GFX12-NEXT: s_wait_kmcnt 0x0 1028; GFX12-NEXT: ; return to shader part epilog 1029 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0) 1030 ret i32 %load 1031} 1032 1033define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) { 1034; GFX6-LABEL: s_buffer_load_imm_bit29: 1035; GFX6: ; %bb.0: 1036; GFX6-NEXT: s_brev_b32 s4, 4 1037; GFX6-NEXT: s_nop 3 1038; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1039; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1040; GFX6-NEXT: ; return to shader part epilog 1041; 1042; GFX7-LABEL: s_buffer_load_imm_bit29: 1043; GFX7: ; %bb.0: 1044; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000 1045; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1046; GFX7-NEXT: ; return to shader part epilog 1047; 1048; GFX8-LABEL: s_buffer_load_imm_bit29: 1049; GFX8: ; %bb.0: 1050; GFX8-NEXT: s_brev_b32 s4, 4 1051; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1052; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1053; GFX8-NEXT: ; return to shader part epilog 1054; 1055; GFX910-LABEL: s_buffer_load_imm_bit29: 1056; GFX910: ; %bb.0: 1057; GFX910-NEXT: s_brev_b32 s4, 4 1058; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1059; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1060; GFX910-NEXT: ; return to shader part epilog 1061; 1062; GFX11-LABEL: s_buffer_load_imm_bit29: 1063; GFX11: ; %bb.0: 1064; GFX11-NEXT: s_brev_b32 s4, 4 1065; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1066; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1067; GFX11-NEXT: ; return to shader part epilog 1068; 1069; GFX12-LABEL: s_buffer_load_imm_bit29: 1070; GFX12: ; %bb.0: 1071; GFX12-NEXT: s_brev_b32 s4, 4 1072; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1073; GFX12-NEXT: s_wait_kmcnt 0x0 1074; GFX12-NEXT: ; return to shader part epilog 1075 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) 1076 ret i32 %load 1077} 1078 1079define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) { 1080; GFX6-LABEL: s_buffer_load_imm_bit21: 1081; GFX6: ; %bb.0: 1082; GFX6-NEXT: s_mov_b32 s4, 0x200000 1083; GFX6-NEXT: s_nop 3 1084; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1085; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1086; GFX6-NEXT: ; return to shader part epilog 1087; 1088; GFX7-LABEL: s_buffer_load_imm_bit21: 1089; GFX7: ; %bb.0: 1090; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000 1091; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1092; GFX7-NEXT: ; return to shader part epilog 1093; 1094; GFX8-LABEL: s_buffer_load_imm_bit21: 1095; GFX8: ; %bb.0: 1096; GFX8-NEXT: s_mov_b32 s4, 0x200000 1097; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1098; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1099; GFX8-NEXT: ; return to shader part epilog 1100; 1101; GFX910-LABEL: s_buffer_load_imm_bit21: 1102; GFX910: ; %bb.0: 1103; GFX910-NEXT: s_mov_b32 s4, 0x200000 1104; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1105; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1106; GFX910-NEXT: ; return to shader part epilog 1107; 1108; GFX11-LABEL: s_buffer_load_imm_bit21: 1109; GFX11: ; %bb.0: 1110; GFX11-NEXT: s_mov_b32 s4, 0x200000 1111; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1112; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1113; GFX11-NEXT: ; return to shader part epilog 1114; 1115; GFX12-LABEL: s_buffer_load_imm_bit21: 1116; GFX12: ; %bb.0: 1117; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x200000 1118; GFX12-NEXT: s_wait_kmcnt 0x0 1119; GFX12-NEXT: ; return to shader part epilog 1120 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) 1121 ret i32 %load 1122} 1123 1124define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) { 1125; GFX6-LABEL: s_buffer_load_imm_bit20: 1126; GFX6: ; %bb.0: 1127; GFX6-NEXT: s_mov_b32 s4, 0x100000 1128; GFX6-NEXT: s_nop 3 1129; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1130; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1131; GFX6-NEXT: ; return to shader part epilog 1132; 1133; GFX7-LABEL: s_buffer_load_imm_bit20: 1134; GFX7: ; %bb.0: 1135; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000 1136; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1137; GFX7-NEXT: ; return to shader part epilog 1138; 1139; GFX8-LABEL: s_buffer_load_imm_bit20: 1140; GFX8: ; %bb.0: 1141; GFX8-NEXT: s_mov_b32 s4, 0x100000 1142; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1143; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1144; GFX8-NEXT: ; return to shader part epilog 1145; 1146; GFX910-LABEL: s_buffer_load_imm_bit20: 1147; GFX910: ; %bb.0: 1148; GFX910-NEXT: s_mov_b32 s4, 0x100000 1149; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1150; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1151; GFX910-NEXT: ; return to shader part epilog 1152; 1153; GFX11-LABEL: s_buffer_load_imm_bit20: 1154; GFX11: ; %bb.0: 1155; GFX11-NEXT: s_mov_b32 s4, 0x100000 1156; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1157; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1158; GFX11-NEXT: ; return to shader part epilog 1159; 1160; GFX12-LABEL: s_buffer_load_imm_bit20: 1161; GFX12: ; %bb.0: 1162; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100000 1163; GFX12-NEXT: s_wait_kmcnt 0x0 1164; GFX12-NEXT: ; return to shader part epilog 1165 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) 1166 ret i32 %load 1167} 1168 1169define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) { 1170; GFX6-LABEL: s_buffer_load_imm_neg_bit20: 1171; GFX6: ; %bb.0: 1172; GFX6-NEXT: s_mov_b32 s4, 0xfff00000 1173; GFX6-NEXT: s_nop 3 1174; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1175; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1176; GFX6-NEXT: ; return to shader part epilog 1177; 1178; GFX7-LABEL: s_buffer_load_imm_neg_bit20: 1179; GFX7: ; %bb.0: 1180; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000 1181; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1182; GFX7-NEXT: ; return to shader part epilog 1183; 1184; GFX8-LABEL: s_buffer_load_imm_neg_bit20: 1185; GFX8: ; %bb.0: 1186; GFX8-NEXT: s_mov_b32 s4, 0xfff00000 1187; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1188; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1189; GFX8-NEXT: ; return to shader part epilog 1190; 1191; GFX910-LABEL: s_buffer_load_imm_neg_bit20: 1192; GFX910: ; %bb.0: 1193; GFX910-NEXT: s_mov_b32 s4, 0xfff00000 1194; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1195; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1196; GFX910-NEXT: ; return to shader part epilog 1197; 1198; GFX11-LABEL: s_buffer_load_imm_neg_bit20: 1199; GFX11: ; %bb.0: 1200; GFX11-NEXT: s_mov_b32 s4, 0xfff00000 1201; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1202; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1203; GFX11-NEXT: ; return to shader part epilog 1204; 1205; GFX12-LABEL: s_buffer_load_imm_neg_bit20: 1206; GFX12: ; %bb.0: 1207; GFX12-NEXT: s_mov_b32 s4, 0xfff00000 1208; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1209; GFX12-NEXT: s_wait_kmcnt 0x0 1210; GFX12-NEXT: ; return to shader part epilog 1211 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) 1212 ret i32 %load 1213} 1214 1215define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) { 1216; GFX6-LABEL: s_buffer_load_imm_bit19: 1217; GFX6: ; %bb.0: 1218; GFX6-NEXT: s_mov_b32 s4, 0x80000 1219; GFX6-NEXT: s_nop 3 1220; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1221; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1222; GFX6-NEXT: ; return to shader part epilog 1223; 1224; GFX7-LABEL: s_buffer_load_imm_bit19: 1225; GFX7: ; %bb.0: 1226; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000 1227; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1228; GFX7-NEXT: ; return to shader part epilog 1229; 1230; GFX8910-LABEL: s_buffer_load_imm_bit19: 1231; GFX8910: ; %bb.0: 1232; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000 1233; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1234; GFX8910-NEXT: ; return to shader part epilog 1235; 1236; GFX11-LABEL: s_buffer_load_imm_bit19: 1237; GFX11: ; %bb.0: 1238; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000 1239; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1240; GFX11-NEXT: ; return to shader part epilog 1241; 1242; GFX12-LABEL: s_buffer_load_imm_bit19: 1243; GFX12: ; %bb.0: 1244; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000 1245; GFX12-NEXT: s_wait_kmcnt 0x0 1246; GFX12-NEXT: ; return to shader part epilog 1247 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) 1248 ret i32 %load 1249} 1250 1251define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) { 1252; GFX6-LABEL: s_buffer_load_imm_neg_bit19: 1253; GFX6: ; %bb.0: 1254; GFX6-NEXT: s_mov_b32 s4, 0xfff80000 1255; GFX6-NEXT: s_nop 3 1256; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1257; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1258; GFX6-NEXT: ; return to shader part epilog 1259; 1260; GFX7-LABEL: s_buffer_load_imm_neg_bit19: 1261; GFX7: ; %bb.0: 1262; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000 1263; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1264; GFX7-NEXT: ; return to shader part epilog 1265; 1266; GFX8-LABEL: s_buffer_load_imm_neg_bit19: 1267; GFX8: ; %bb.0: 1268; GFX8-NEXT: s_mov_b32 s4, 0xfff80000 1269; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4 1270; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1271; GFX8-NEXT: ; return to shader part epilog 1272; 1273; GFX910-LABEL: s_buffer_load_imm_neg_bit19: 1274; GFX910: ; %bb.0: 1275; GFX910-NEXT: s_mov_b32 s4, 0xfff80000 1276; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 1277; GFX910-NEXT: s_waitcnt lgkmcnt(0) 1278; GFX910-NEXT: ; return to shader part epilog 1279; 1280; GFX11-LABEL: s_buffer_load_imm_neg_bit19: 1281; GFX11: ; %bb.0: 1282; GFX11-NEXT: s_mov_b32 s4, 0xfff80000 1283; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1284; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1285; GFX11-NEXT: ; return to shader part epilog 1286; 1287; GFX12-LABEL: s_buffer_load_imm_neg_bit19: 1288; GFX12: ; %bb.0: 1289; GFX12-NEXT: s_mov_b32 s4, 0xfff80000 1290; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 1291; GFX12-NEXT: s_wait_kmcnt 0x0 1292; GFX12-NEXT: ; return to shader part epilog 1293 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) 1294 ret i32 %load 1295} 1296 1297define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) { 1298; GFX6-LABEL: s_buffer_load_imm_255: 1299; GFX6: ; %bb.0: 1300; GFX6-NEXT: s_movk_i32 s4, 0xff 1301; GFX6-NEXT: s_nop 3 1302; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1303; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1304; GFX6-NEXT: ; return to shader part epilog 1305; 1306; GFX7-LABEL: s_buffer_load_imm_255: 1307; GFX7: ; %bb.0: 1308; GFX7-NEXT: s_movk_i32 s4, 0xff 1309; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4 1310; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1311; GFX7-NEXT: ; return to shader part epilog 1312; 1313; GFX8910-LABEL: s_buffer_load_imm_255: 1314; GFX8910: ; %bb.0: 1315; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0xff 1316; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1317; GFX8910-NEXT: ; return to shader part epilog 1318; 1319; GFX11-LABEL: s_buffer_load_imm_255: 1320; GFX11: ; %bb.0: 1321; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff 1322; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1323; GFX11-NEXT: ; return to shader part epilog 1324; 1325; GFX12-LABEL: s_buffer_load_imm_255: 1326; GFX12: ; %bb.0: 1327; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff 1328; GFX12-NEXT: s_wait_kmcnt 0x0 1329; GFX12-NEXT: ; return to shader part epilog 1330 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0) 1331 ret i32 %load 1332} 1333 1334define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) { 1335; GFX67-LABEL: s_buffer_load_imm_256: 1336; GFX67: ; %bb.0: 1337; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40 1338; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1339; GFX67-NEXT: ; return to shader part epilog 1340; 1341; GFX8910-LABEL: s_buffer_load_imm_256: 1342; GFX8910: ; %bb.0: 1343; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x100 1344; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1345; GFX8910-NEXT: ; return to shader part epilog 1346; 1347; GFX11-LABEL: s_buffer_load_imm_256: 1348; GFX11: ; %bb.0: 1349; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100 1350; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1351; GFX11-NEXT: ; return to shader part epilog 1352; 1353; GFX12-LABEL: s_buffer_load_imm_256: 1354; GFX12: ; %bb.0: 1355; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100 1356; GFX12-NEXT: s_wait_kmcnt 0x0 1357; GFX12-NEXT: ; return to shader part epilog 1358 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0) 1359 ret i32 %load 1360} 1361 1362define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) { 1363; GFX67-LABEL: s_buffer_load_imm_1016: 1364; GFX67: ; %bb.0: 1365; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe 1366; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1367; GFX67-NEXT: ; return to shader part epilog 1368; 1369; GFX8910-LABEL: s_buffer_load_imm_1016: 1370; GFX8910: ; %bb.0: 1371; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8 1372; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1373; GFX8910-NEXT: ; return to shader part epilog 1374; 1375; GFX11-LABEL: s_buffer_load_imm_1016: 1376; GFX11: ; %bb.0: 1377; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8 1378; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1379; GFX11-NEXT: ; return to shader part epilog 1380; 1381; GFX12-LABEL: s_buffer_load_imm_1016: 1382; GFX12: ; %bb.0: 1383; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8 1384; GFX12-NEXT: s_wait_kmcnt 0x0 1385; GFX12-NEXT: ; return to shader part epilog 1386 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0) 1387 ret i32 %load 1388} 1389 1390define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) { 1391; GFX67-LABEL: s_buffer_load_imm_1020: 1392; GFX67: ; %bb.0: 1393; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff 1394; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1395; GFX67-NEXT: ; return to shader part epilog 1396; 1397; GFX8910-LABEL: s_buffer_load_imm_1020: 1398; GFX8910: ; %bb.0: 1399; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc 1400; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1401; GFX8910-NEXT: ; return to shader part epilog 1402; 1403; GFX11-LABEL: s_buffer_load_imm_1020: 1404; GFX11: ; %bb.0: 1405; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc 1406; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1407; GFX11-NEXT: ; return to shader part epilog 1408; 1409; GFX12-LABEL: s_buffer_load_imm_1020: 1410; GFX12: ; %bb.0: 1411; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc 1412; GFX12-NEXT: s_wait_kmcnt 0x0 1413; GFX12-NEXT: ; return to shader part epilog 1414 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0) 1415 ret i32 %load 1416} 1417 1418define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) { 1419; GFX6-LABEL: s_buffer_load_imm_1021: 1420; GFX6: ; %bb.0: 1421; GFX6-NEXT: s_movk_i32 s4, 0x3fd 1422; GFX6-NEXT: s_nop 3 1423; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1424; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1425; GFX6-NEXT: ; return to shader part epilog 1426; 1427; GFX7-LABEL: s_buffer_load_imm_1021: 1428; GFX7: ; %bb.0: 1429; GFX7-NEXT: s_movk_i32 s4, 0x3fd 1430; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4 1431; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1432; GFX7-NEXT: ; return to shader part epilog 1433; 1434; GFX8910-LABEL: s_buffer_load_imm_1021: 1435; GFX8910: ; %bb.0: 1436; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd 1437; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1438; GFX8910-NEXT: ; return to shader part epilog 1439; 1440; GFX11-LABEL: s_buffer_load_imm_1021: 1441; GFX11: ; %bb.0: 1442; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd 1443; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1444; GFX11-NEXT: ; return to shader part epilog 1445; 1446; GFX12-LABEL: s_buffer_load_imm_1021: 1447; GFX12: ; %bb.0: 1448; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd 1449; GFX12-NEXT: s_wait_kmcnt 0x0 1450; GFX12-NEXT: ; return to shader part epilog 1451 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0) 1452 ret i32 %load 1453} 1454 1455define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) { 1456; GFX6-LABEL: s_buffer_load_imm_1024: 1457; GFX6: ; %bb.0: 1458; GFX6-NEXT: s_movk_i32 s4, 0x400 1459; GFX6-NEXT: s_nop 3 1460; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1461; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1462; GFX6-NEXT: ; return to shader part epilog 1463; 1464; GFX7-LABEL: s_buffer_load_imm_1024: 1465; GFX7: ; %bb.0: 1466; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100 1467; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1468; GFX7-NEXT: ; return to shader part epilog 1469; 1470; GFX8910-LABEL: s_buffer_load_imm_1024: 1471; GFX8910: ; %bb.0: 1472; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400 1473; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1474; GFX8910-NEXT: ; return to shader part epilog 1475; 1476; GFX11-LABEL: s_buffer_load_imm_1024: 1477; GFX11: ; %bb.0: 1478; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400 1479; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1480; GFX11-NEXT: ; return to shader part epilog 1481; 1482; GFX12-LABEL: s_buffer_load_imm_1024: 1483; GFX12: ; %bb.0: 1484; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400 1485; GFX12-NEXT: s_wait_kmcnt 0x0 1486; GFX12-NEXT: ; return to shader part epilog 1487 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) 1488 ret i32 %load 1489} 1490 1491define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) { 1492; GFX6-LABEL: s_buffer_load_imm_1025: 1493; GFX6: ; %bb.0: 1494; GFX6-NEXT: s_movk_i32 s4, 0x401 1495; GFX6-NEXT: s_nop 3 1496; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1497; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1498; GFX6-NEXT: ; return to shader part epilog 1499; 1500; GFX7-LABEL: s_buffer_load_imm_1025: 1501; GFX7: ; %bb.0: 1502; GFX7-NEXT: s_movk_i32 s4, 0x401 1503; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4 1504; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1505; GFX7-NEXT: ; return to shader part epilog 1506; 1507; GFX8910-LABEL: s_buffer_load_imm_1025: 1508; GFX8910: ; %bb.0: 1509; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x401 1510; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1511; GFX8910-NEXT: ; return to shader part epilog 1512; 1513; GFX11-LABEL: s_buffer_load_imm_1025: 1514; GFX11: ; %bb.0: 1515; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401 1516; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1517; GFX11-NEXT: ; return to shader part epilog 1518; 1519; GFX12-LABEL: s_buffer_load_imm_1025: 1520; GFX12: ; %bb.0: 1521; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401 1522; GFX12-NEXT: s_wait_kmcnt 0x0 1523; GFX12-NEXT: ; return to shader part epilog 1524 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0) 1525 ret i32 %load 1526} 1527 1528define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) { 1529; GFX6-LABEL: s_buffer_load_imm_1028: 1530; GFX6: ; %bb.0: 1531; GFX6-NEXT: s_movk_i32 s4, 0x400 1532; GFX6-NEXT: s_nop 3 1533; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4 1534; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1535; GFX6-NEXT: ; return to shader part epilog 1536; 1537; GFX7-LABEL: s_buffer_load_imm_1028: 1538; GFX7: ; %bb.0: 1539; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100 1540; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1541; GFX7-NEXT: ; return to shader part epilog 1542; 1543; GFX8910-LABEL: s_buffer_load_imm_1028: 1544; GFX8910: ; %bb.0: 1545; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400 1546; GFX8910-NEXT: s_waitcnt lgkmcnt(0) 1547; GFX8910-NEXT: ; return to shader part epilog 1548; 1549; GFX11-LABEL: s_buffer_load_imm_1028: 1550; GFX11: ; %bb.0: 1551; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400 1552; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1553; GFX11-NEXT: ; return to shader part epilog 1554; 1555; GFX12-LABEL: s_buffer_load_imm_1028: 1556; GFX12: ; %bb.0: 1557; GFX12-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400 1558; GFX12-NEXT: s_wait_kmcnt 0x0 1559; GFX12-NEXT: ; return to shader part epilog 1560 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) 1561 ret i32 %load 1562} 1563 1564declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) 1565declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) 1566declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32) 1567declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32) 1568declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) 1569 1570;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1571; GFX6789: {{.*}} 1572; GFX789: {{.*}} 1573; GFX89: {{.*}} 1574