1; RUN: llc -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s 4; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -filetype=obj -amdgpu-use-divergent-register-indexing < %s | llvm-readobj -r - | FileCheck --check-prefix=RELS %s 5; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,MUBUF,GFX10_W32-MUBUF,GFX9_10-MUBUF %s 6; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,MUBUF,GFX10_W64-MUBUF,GFX9_10-MUBUF %s 7; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,FLATSCR,GFX9-FLATSCR %s 8; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1030 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,FLATSCR,GFX10-FLATSCR %s 9; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,FLATSCR,GFX9-FLATSCR-PAL %s 10; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,FLATSCR,GFX10-FLATSCR-PAL %s 11; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,GFX11-FLATSCR %s 12; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-flat-for-global,+enable-flat-scratch -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,GFX11-FLATSCR %s 13 14; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 15; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 16 17; This used to fail due to a v_add_i32 instruction with an illegal immediate 18; operand that was created during Local Stack Slot Allocation. Test case derived 19; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 20; 21; GCN-LABEL: {{^}}ps_main: 22 23; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2 24; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0 25; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 26 27; GFX10-FLATSCR: s_add_u32 s0, s0, s2 28; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 29; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 30; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 31 32; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 33; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 34; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 35; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 36; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 37; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 38; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 39; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 40; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 41; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 42; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 43 44; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 45; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 46; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 47; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 48; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 49; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 50; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 51; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 52; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 53 54; SIVI-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 55; SIVI-DAG: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 56; SIVI-DAG: s_mov_b32 s6, -1 57 58; GFX9-MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 59; GFX9-MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 60; GFX9-MUBUF-DAG: s_mov_b32 s2, -1 61 62; SI-DAG: s_mov_b32 s7, 0xe8f000 63; VI-DAG: s_mov_b32 s7, 0xe80000 64; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000 65; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000 66; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000 67 68; FLATSCR-NOT: SCRATCH_RSRC_DWORD 69 70; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 71; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: 72 73; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset: 74 75; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 76; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] 77; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 78; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 79; GFX11-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 80 81; MUBUF-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x200, [[CLAMP_IDX]] 82; FLATSCR: v_mov_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]] 83 84; MUBUF: buffer_load_dword {{v[0-9]+}}, [[CLAMP_IDX]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen 85; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen 86; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off 87; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, [[CLAMP_IDX]], off{{$}} 88define amdgpu_ps float @ps_main(i32 %idx) { 89 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 90 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 91 %r = fadd float %v1, %v2 92 ret float %r 93} 94 95; GCN-LABEL: {{^}}vs_main: 96; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 97; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 98 99; GFX10-FLATSCR: s_add_u32 s0, s0, s2 100; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 101; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 102; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 103 104; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 105; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 106; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 107; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 108; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 109; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 110; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 111; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 112; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 113; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 114 115; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 116; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 117; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 118; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 119; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 120; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 121; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 122; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 123; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 124 125; MUBUF-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 126 127; FLATSCR-NOT: SCRATCH_RSRC_DWORD 128 129; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 130; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 131 132; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 133; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: 134 135; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 136; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 137 138; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 139; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 140 141define amdgpu_vs float @vs_main(i32 %idx) { 142 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 143 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 144 %r = fadd float %v1, %v2 145 ret float %r 146} 147 148; GCN-LABEL: {{^}}cs_main: 149; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 150; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 151 152; GFX10-FLATSCR: s_add_u32 s0, s0, s2 153; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 154; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 155; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 156 157; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 158; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 159; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10 160; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 161; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 162; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 163; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 164; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 165; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 166; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 167 168; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 169; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 170; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10 171; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 172; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 173; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 174; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 175; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 176; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 177 178; MUBUF-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 179 180; FLATSCR-NOT: SCRATCH_RSRC_DWORD 181 182; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 183; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 184 185; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 186; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 187 188; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 189; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 190 191define amdgpu_cs float @cs_main(i32 %idx) { 192 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 193 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 194 %r = fadd float %v1, %v2 195 ret float %r 196} 197 198; GCN-LABEL: {{^}}hs_main: 199; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 200; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 201 202; GFX10-FLATSCR: s_add_u32 s0, s0, s5 203; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 204; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 205; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 206 207; SIVI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 208; SIVI-NOT: s_mov_b32 s4 209; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 210; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 211 212; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 213; GFX9PLUS-NOT: s_mov_b32 s5 214; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 215; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 216 217; FLATSCR-NOT: SCRATCH_RSRC_DWORD 218; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 219; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 220 221; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 222; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 223define amdgpu_hs float @hs_main(i32 %idx) { 224 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 225 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 226 %r = fadd float %v1, %v2 227 ret float %r 228} 229 230; GCN-LABEL: {{^}}gs_main: 231; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 232; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 233 234; GFX10-FLATSCR: s_add_u32 s0, s0, s5 235; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 236; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 237; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 238 239; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 240; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 241; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 242; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 243; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 244; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 245; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 246; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 247; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 248; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 249 250; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 251; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 252; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 253; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 254; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 255; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 256; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 257; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 258; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 259 260; SIVI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 261; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 262; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 263 264; GFX9_10-MUBUF: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 265; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 266; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 267 268; FLATSCR-NOT: SCRATCH_RSRC_DWORD 269; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 270; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 271 272; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 273; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 274define amdgpu_gs float @gs_main(i32 %idx) { 275 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 276 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 277 %r = fadd float %v1, %v2 278 ret float %r 279} 280 281; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at 282; SGPR5, and the inreg implementation is used to reference it in the IR. The 283; following tests confirm the shader and anything inserted after the return 284; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset. 285 286; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: 287; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 288; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 289 290; GFX10-FLATSCR: s_add_u32 s0, s0, s5 291; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 292; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 293; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 294 295; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 296; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 297; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 298; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 299; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 300; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 301; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 302; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 303; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 304; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 305 306; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 307; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 308; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 309; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 310; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 311; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 312; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 313; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 314; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 315 316; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 317; FLATSCR-NOT: SCRATCH_RSRC_DWORD 318 319; SIVI-NOT: s_mov_b32 s6 320; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 321; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 322 323; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 324; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 325 326; MUBUF-DAG: s_mov_b32 s2, s5 327 328; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 329; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 330 331; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 332; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 333define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { 334 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 335 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 336 %f = fadd float %v1, %v2 337 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 338 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 339 ret <{i32, i32, i32, float}> %r2 340} 341 342; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: 343; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 344; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 345 346; GFX10-FLATSCR: s_add_u32 s0, s0, s5 347; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 348; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 349; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 350 351; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 352; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 353; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 354; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 355; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 356; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 357; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 358; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 359; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 360; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 361 362; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 363; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 364; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 365; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 366; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 367; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 368; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 369; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 370; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 371 372; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 373; FLATSCR-NOT: SCRATCH_RSRC_DWORD 374 375; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 376; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 377 378; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 379; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 380 381; MUBUF-DAG: s_mov_b32 s2, s5 382 383; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 384; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 385 386; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 387; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off 388define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { 389 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 390 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 391 %f = fadd float %v1, %v2 392 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 393 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 394 ret <{i32, i32, i32, float}> %r2 395} 396