1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s 3 4; Natural mapping 5define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 6 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7 ; GFX908: bb.0 (%ir-block.0): 8 ; GFX908-NEXT: successors: %bb.1(0x80000000) 9 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 10 ; GFX908-NEXT: {{ $}} 11 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 12 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 13 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 16 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 18 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 19 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 20 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 21 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 22 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 23 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 24 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 25 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 26 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 27 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 28 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 29 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 30 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 31 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 32 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 33 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 34 ; GFX908-NEXT: {{ $}} 35 ; GFX908-NEXT: bb.1: 36 ; GFX908-NEXT: successors: %bb.2(0x80000000) 37 ; GFX908-NEXT: {{ $}} 38 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 39 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 40 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 41 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 42 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 43 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 44 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 45 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 46 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 47 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 48 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 49 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 50 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 51 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 52 ; GFX908-NEXT: {{ $}} 53 ; GFX908-NEXT: bb.2: 54 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 55 ; GFX908-NEXT: {{ $}} 56 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 57 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 58 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 59 ; GFX908-NEXT: {{ $}} 60 ; GFX908-NEXT: bb.3: 61 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 62 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 63 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 64 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 65 ret float %val 66} 67 68; Copies for VGPR arguments 69define float @raw_ptr_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 70 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset 71 ; GFX908: bb.0 (%ir-block.0): 72 ; GFX908-NEXT: successors: %bb.1(0x80000000) 73 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 74 ; GFX908-NEXT: {{ $}} 75 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 76 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 77 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 78 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 79 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 80 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 81 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 82 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 83 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 84 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 85 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 86 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 87 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 88 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 89 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 90 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 91 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 92 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 93 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 94 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 95 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 96 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 97 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 98 ; GFX908-NEXT: {{ $}} 99 ; GFX908-NEXT: bb.1: 100 ; GFX908-NEXT: successors: %bb.2(0x80000000) 101 ; GFX908-NEXT: {{ $}} 102 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 103 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 104 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 105 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 106 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 107 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 108 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 109 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 110 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 111 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 112 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 113 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 114 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 115 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 116 ; GFX908-NEXT: {{ $}} 117 ; GFX908-NEXT: bb.2: 118 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 119 ; GFX908-NEXT: {{ $}} 120 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 121 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 122 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 123 ; GFX908-NEXT: {{ $}} 124 ; GFX908-NEXT: bb.3: 125 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 126 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 127 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 128 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 129 ret float %val 130} 131 132; Waterfall for rsrc 133define float @raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 134 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset 135 ; GFX908: bb.0 (%ir-block.0): 136 ; GFX908-NEXT: successors: %bb.1(0x80000000) 137 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 138 ; GFX908-NEXT: {{ $}} 139 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 140 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 141 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 142 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 143 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 144 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 145 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 146 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 147 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 148 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 149 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 150 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 151 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 152 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 153 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 154 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 155 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 156 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 157 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 158 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 159 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 160 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 161 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 162 ; GFX908-NEXT: {{ $}} 163 ; GFX908-NEXT: bb.1: 164 ; GFX908-NEXT: successors: %bb.2(0x80000000) 165 ; GFX908-NEXT: {{ $}} 166 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 167 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 168 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 169 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 170 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 171 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 172 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 173 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 174 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 175 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 176 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 177 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 178 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 179 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 180 ; GFX908-NEXT: {{ $}} 181 ; GFX908-NEXT: bb.2: 182 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 183 ; GFX908-NEXT: {{ $}} 184 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 185 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 186 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 187 ; GFX908-NEXT: {{ $}} 188 ; GFX908-NEXT: bb.3: 189 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 190 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 191 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 192 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 193 ret float %val 194} 195 196; Waterfall for rsrc and soffset 197define float @raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 198 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset 199 ; GFX908: bb.0 (%ir-block.0): 200 ; GFX908-NEXT: successors: %bb.1(0x80000000) 201 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 202 ; GFX908-NEXT: {{ $}} 203 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 204 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 205 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 206 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 207 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 208 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 209 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 210 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 211 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 212 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 213 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 214 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 215 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 216 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 217 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 218 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 219 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 220 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 221 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 222 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 223 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 224 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 225 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 226 ; GFX908-NEXT: {{ $}} 227 ; GFX908-NEXT: bb.1: 228 ; GFX908-NEXT: successors: %bb.2(0x80000000) 229 ; GFX908-NEXT: {{ $}} 230 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 231 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 232 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 233 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 234 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 235 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 236 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 237 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 238 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 239 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 240 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 241 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 242 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 243 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 244 ; GFX908-NEXT: {{ $}} 245 ; GFX908-NEXT: bb.2: 246 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 247 ; GFX908-NEXT: {{ $}} 248 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 249 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 250 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 251 ; GFX908-NEXT: {{ $}} 252 ; GFX908-NEXT: bb.3: 253 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 254 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 255 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 256 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 257 ret float %val 258} 259 260; Natural mapping + glc 261define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 262 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 263 ; GFX908: bb.0 (%ir-block.0): 264 ; GFX908-NEXT: successors: %bb.1(0x80000000) 265 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 266 ; GFX908-NEXT: {{ $}} 267 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 268 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 269 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 270 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 271 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 272 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 273 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 274 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 275 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 276 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 277 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 278 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 279 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 280 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 281 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 282 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 283 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 284 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 285 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 286 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 287 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 288 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 289 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 290 ; GFX908-NEXT: {{ $}} 291 ; GFX908-NEXT: bb.1: 292 ; GFX908-NEXT: successors: %bb.2(0x80000000) 293 ; GFX908-NEXT: {{ $}} 294 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 295 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 296 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 297 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 298 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 299 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 300 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 301 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 302 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 303 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 304 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 305 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 306 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 307 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 308 ; GFX908-NEXT: {{ $}} 309 ; GFX908-NEXT: bb.2: 310 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 311 ; GFX908-NEXT: {{ $}} 312 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 313 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 314 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 315 ; GFX908-NEXT: {{ $}} 316 ; GFX908-NEXT: bb.3: 317 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 318 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 319 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 320 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 1) 321 ret float %val 322} 323 324; Natural mapping + slc 325define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 326 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 327 ; GFX908: bb.0 (%ir-block.0): 328 ; GFX908-NEXT: successors: %bb.1(0x80000000) 329 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 330 ; GFX908-NEXT: {{ $}} 331 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 332 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 333 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 334 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 335 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 336 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 337 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 338 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 339 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 340 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 341 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 342 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 343 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 344 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 345 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 346 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 347 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 348 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 349 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 350 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 351 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 352 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 353 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 354 ; GFX908-NEXT: {{ $}} 355 ; GFX908-NEXT: bb.1: 356 ; GFX908-NEXT: successors: %bb.2(0x80000000) 357 ; GFX908-NEXT: {{ $}} 358 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 359 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 360 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 361 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 362 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 363 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 364 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 365 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 366 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 367 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 368 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 369 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 370 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 371 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 372 ; GFX908-NEXT: {{ $}} 373 ; GFX908-NEXT: bb.2: 374 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 375 ; GFX908-NEXT: {{ $}} 376 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 377 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 378 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 379 ; GFX908-NEXT: {{ $}} 380 ; GFX908-NEXT: bb.3: 381 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 382 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 383 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 384 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) 385 ret float %val 386} 387 388; Natural mapping + dlc 389define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 390 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 391 ; GFX908: bb.0 (%ir-block.0): 392 ; GFX908-NEXT: successors: %bb.1(0x80000000) 393 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 394 ; GFX908-NEXT: {{ $}} 395 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 396 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 397 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 398 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 399 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 400 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 401 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 402 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 403 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 404 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 405 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 406 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 407 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 408 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 409 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 410 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 411 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 412 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 413 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 414 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 415 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 416 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 417 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 418 ; GFX908-NEXT: {{ $}} 419 ; GFX908-NEXT: bb.1: 420 ; GFX908-NEXT: successors: %bb.2(0x80000000) 421 ; GFX908-NEXT: {{ $}} 422 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 423 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 424 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 425 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 426 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 427 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 428 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 429 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 430 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 431 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 432 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 433 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 434 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 435 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 436 ; GFX908-NEXT: {{ $}} 437 ; GFX908-NEXT: bb.2: 438 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 439 ; GFX908-NEXT: {{ $}} 440 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 441 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 442 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 443 ; GFX908-NEXT: {{ $}} 444 ; GFX908-NEXT: bb.3: 445 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 446 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 447 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 448 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 4) 449 ret float %val 450} 451 452; Natural mapping + slc + dlc 453define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 454 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc 455 ; GFX908: bb.0 (%ir-block.0): 456 ; GFX908-NEXT: successors: %bb.1(0x80000000) 457 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 458 ; GFX908-NEXT: {{ $}} 459 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 460 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 461 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 462 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 463 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 464 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 465 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 466 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 467 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 468 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 469 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 470 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 471 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 472 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 473 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 474 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 475 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 476 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 477 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 478 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 479 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 480 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 481 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 482 ; GFX908-NEXT: {{ $}} 483 ; GFX908-NEXT: bb.1: 484 ; GFX908-NEXT: successors: %bb.2(0x80000000) 485 ; GFX908-NEXT: {{ $}} 486 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 487 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 488 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 489 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 490 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 491 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 492 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 493 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 494 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 495 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 496 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 497 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 498 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 499 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 500 ; GFX908-NEXT: {{ $}} 501 ; GFX908-NEXT: bb.2: 502 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 503 ; GFX908-NEXT: {{ $}} 504 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 505 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 506 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 507 ; GFX908-NEXT: {{ $}} 508 ; GFX908-NEXT: bb.3: 509 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 510 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 511 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 512 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 6) 513 ret float %val 514} 515 516; Natural mapping + glc + dlc 517define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 518 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc 519 ; GFX908: bb.0 (%ir-block.0): 520 ; GFX908-NEXT: successors: %bb.1(0x80000000) 521 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 522 ; GFX908-NEXT: {{ $}} 523 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 524 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 525 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 526 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 527 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 528 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 529 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 530 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 531 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 532 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 533 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 534 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 535 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 536 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 537 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 538 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 539 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 540 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 541 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 542 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 543 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 544 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 545 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 546 ; GFX908-NEXT: {{ $}} 547 ; GFX908-NEXT: bb.1: 548 ; GFX908-NEXT: successors: %bb.2(0x80000000) 549 ; GFX908-NEXT: {{ $}} 550 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 551 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 552 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 553 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 554 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 555 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 556 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 557 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 558 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 559 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 560 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 561 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 562 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 563 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 564 ; GFX908-NEXT: {{ $}} 565 ; GFX908-NEXT: bb.2: 566 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 567 ; GFX908-NEXT: {{ $}} 568 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 569 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 570 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 571 ; GFX908-NEXT: {{ $}} 572 ; GFX908-NEXT: bb.3: 573 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 574 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 575 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 576 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 5) 577 ret float %val 578} 579 580; Natural mapping + glc + slc + dlc 581define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 582 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc 583 ; GFX908: bb.0 (%ir-block.0): 584 ; GFX908-NEXT: successors: %bb.1(0x80000000) 585 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 586 ; GFX908-NEXT: {{ $}} 587 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 588 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 589 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 590 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 591 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 592 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 593 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 594 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 595 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 596 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 597 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 598 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 599 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 600 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 601 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 602 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 603 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 604 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 605 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 606 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 607 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 608 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 609 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 610 ; GFX908-NEXT: {{ $}} 611 ; GFX908-NEXT: bb.1: 612 ; GFX908-NEXT: successors: %bb.2(0x80000000) 613 ; GFX908-NEXT: {{ $}} 614 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 615 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 616 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 617 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 618 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 619 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 620 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 621 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 622 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 623 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 624 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 625 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 626 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 627 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 628 ; GFX908-NEXT: {{ $}} 629 ; GFX908-NEXT: bb.2: 630 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 631 ; GFX908-NEXT: {{ $}} 632 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 633 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 634 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 635 ; GFX908-NEXT: {{ $}} 636 ; GFX908-NEXT: bb.3: 637 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 638 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 639 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 640 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 7) 641 ret float %val 642} 643 644; Natural mapping 645define <2 x float> @raw_ptr_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 646 ; GFX908-LABEL: name: raw_ptr_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 647 ; GFX908: bb.0 (%ir-block.0): 648 ; GFX908-NEXT: successors: %bb.1(0x80000000) 649 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 650 ; GFX908-NEXT: {{ $}} 651 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 652 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 653 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 654 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 655 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 656 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 657 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 658 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 659 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 660 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 661 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 662 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 663 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 664 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 665 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 666 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 667 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 668 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 669 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 670 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 671 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 672 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 673 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 674 ; GFX908-NEXT: {{ $}} 675 ; GFX908-NEXT: bb.1: 676 ; GFX908-NEXT: successors: %bb.2(0x80000000) 677 ; GFX908-NEXT: {{ $}} 678 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 679 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 680 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 681 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 682 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 683 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 684 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 685 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 686 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 687 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 688 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 689 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 690 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 691 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 692 ; GFX908-NEXT: {{ $}} 693 ; GFX908-NEXT: bb.2: 694 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 695 ; GFX908-NEXT: {{ $}} 696 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.rsrc, align 1, addrspace 8) 697 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 698 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 699 ; GFX908-NEXT: {{ $}} 700 ; GFX908-NEXT: bb.3: 701 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 702 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 703 ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 704 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 705 ; GFX908-NEXT: $vgpr1 = COPY [[COPY11]] 706 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 707 %val = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 708 ret <2 x float> %val 709} 710 711define <3 x float> @raw_ptr_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 712 ; GFX908-LABEL: name: raw_ptr_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 713 ; GFX908: bb.0 (%ir-block.0): 714 ; GFX908-NEXT: successors: %bb.1(0x80000000) 715 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 716 ; GFX908-NEXT: {{ $}} 717 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 718 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 719 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 720 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 721 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 722 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 723 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 724 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 725 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 726 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 727 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 728 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 729 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 730 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 731 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 732 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 733 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 734 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 735 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 736 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 737 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 738 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 739 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 740 ; GFX908-NEXT: {{ $}} 741 ; GFX908-NEXT: bb.1: 742 ; GFX908-NEXT: successors: %bb.2(0x80000000) 743 ; GFX908-NEXT: {{ $}} 744 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 745 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 746 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 747 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 748 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 749 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 750 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 751 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 752 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 753 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 754 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 755 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 756 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 757 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 758 ; GFX908-NEXT: {{ $}} 759 ; GFX908-NEXT: bb.2: 760 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 761 ; GFX908-NEXT: {{ $}} 762 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from %ir.rsrc, align 1, addrspace 8) 763 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 764 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 765 ; GFX908-NEXT: {{ $}} 766 ; GFX908-NEXT: bb.3: 767 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 768 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 769 ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 770 ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 771 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 772 ; GFX908-NEXT: $vgpr1 = COPY [[COPY11]] 773 ; GFX908-NEXT: $vgpr2 = COPY [[COPY12]] 774 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 775 %val = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 776 ret <3 x float> %val 777} 778 779define <4 x float> @raw_ptr_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 780 ; GFX908-LABEL: name: raw_ptr_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 781 ; GFX908: bb.0 (%ir-block.0): 782 ; GFX908-NEXT: successors: %bb.1(0x80000000) 783 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 784 ; GFX908-NEXT: {{ $}} 785 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 786 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 787 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 788 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 789 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 790 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 791 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 792 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 793 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 794 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 795 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 796 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 797 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 798 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 799 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 800 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 801 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 802 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 803 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 804 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 805 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 806 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 807 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 808 ; GFX908-NEXT: {{ $}} 809 ; GFX908-NEXT: bb.1: 810 ; GFX908-NEXT: successors: %bb.2(0x80000000) 811 ; GFX908-NEXT: {{ $}} 812 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 813 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 814 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 815 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 816 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 817 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 818 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 819 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 820 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 821 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 822 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 823 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 824 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 825 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 826 ; GFX908-NEXT: {{ $}} 827 ; GFX908-NEXT: bb.2: 828 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 829 ; GFX908-NEXT: {{ $}} 830 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.rsrc, align 1, addrspace 8) 831 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 832 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 833 ; GFX908-NEXT: {{ $}} 834 ; GFX908-NEXT: bb.3: 835 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 836 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 837 ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 838 ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 839 ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 840 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 841 ; GFX908-NEXT: $vgpr1 = COPY [[COPY11]] 842 ; GFX908-NEXT: $vgpr2 = COPY [[COPY12]] 843 ; GFX908-NEXT: $vgpr3 = COPY [[COPY13]] 844 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 845 %val = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 846 ret <4 x float> %val 847} 848 849define half @raw_ptr_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 850 ; GFX908-LABEL: name: raw_ptr_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 851 ; GFX908: bb.0 (%ir-block.0): 852 ; GFX908-NEXT: successors: %bb.1(0x80000000) 853 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 854 ; GFX908-NEXT: {{ $}} 855 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 856 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 857 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 858 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 859 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 860 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 861 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 862 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 863 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 864 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 865 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 866 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 867 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 868 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 869 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 870 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 871 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 872 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 873 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 874 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 875 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 876 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 877 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 878 ; GFX908-NEXT: {{ $}} 879 ; GFX908-NEXT: bb.1: 880 ; GFX908-NEXT: successors: %bb.2(0x80000000) 881 ; GFX908-NEXT: {{ $}} 882 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 883 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 884 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 885 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 886 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 887 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 888 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 889 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 890 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 891 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 892 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 893 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 894 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 895 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 896 ; GFX908-NEXT: {{ $}} 897 ; GFX908-NEXT: bb.2: 898 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 899 ; GFX908-NEXT: {{ $}} 900 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 901 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 902 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 903 ; GFX908-NEXT: {{ $}} 904 ; GFX908-NEXT: bb.3: 905 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 906 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_USHORT_OFFEN]] 907 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 908 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 909 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 910 ret half %val 911} 912 913define <2 x half> @raw_ptr_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 914 ; GFX908-LABEL: name: raw_ptr_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 915 ; GFX908: bb.0 (%ir-block.0): 916 ; GFX908-NEXT: successors: %bb.1(0x80000000) 917 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 918 ; GFX908-NEXT: {{ $}} 919 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 920 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 921 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 922 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 923 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 924 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 925 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 926 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 927 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 928 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 929 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 930 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 931 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 932 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 933 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 934 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 935 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 936 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 937 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 938 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 939 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 940 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 941 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 942 ; GFX908-NEXT: {{ $}} 943 ; GFX908-NEXT: bb.1: 944 ; GFX908-NEXT: successors: %bb.2(0x80000000) 945 ; GFX908-NEXT: {{ $}} 946 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 947 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 948 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 949 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 950 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 951 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 952 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 953 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 954 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 955 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 956 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 957 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 958 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 959 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 960 ; GFX908-NEXT: {{ $}} 961 ; GFX908-NEXT: bb.2: 962 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 963 ; GFX908-NEXT: {{ $}} 964 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 965 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 966 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 967 ; GFX908-NEXT: {{ $}} 968 ; GFX908-NEXT: bb.3: 969 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 970 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 971 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 972 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 973 ret <2 x half> %val 974} 975 976define <4 x half> @raw_ptr_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 977 ; GFX908-LABEL: name: raw_ptr_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 978 ; GFX908: bb.0 (%ir-block.0): 979 ; GFX908-NEXT: successors: %bb.1(0x80000000) 980 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 981 ; GFX908-NEXT: {{ $}} 982 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 983 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 984 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 985 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 986 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 987 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 988 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 989 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 990 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 991 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 992 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 993 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 994 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 995 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 996 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 997 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 998 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 999 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1000 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1001 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1002 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1003 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1004 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1005 ; GFX908-NEXT: {{ $}} 1006 ; GFX908-NEXT: bb.1: 1007 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1008 ; GFX908-NEXT: {{ $}} 1009 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1010 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1011 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1012 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1013 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1014 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1015 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1016 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1017 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1018 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1019 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1020 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1021 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1022 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1023 ; GFX908-NEXT: {{ $}} 1024 ; GFX908-NEXT: bb.2: 1025 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1026 ; GFX908-NEXT: {{ $}} 1027 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.rsrc, align 1, addrspace 8) 1028 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1029 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1030 ; GFX908-NEXT: {{ $}} 1031 ; GFX908-NEXT: bb.3: 1032 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1033 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 1034 ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 1035 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 1036 ; GFX908-NEXT: $vgpr1 = COPY [[COPY11]] 1037 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 1038 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1039 ret <4 x half> %val 1040} 1041 1042define float @raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1043 ; GFX908-LABEL: name: raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext 1044 ; GFX908: bb.0 (%ir-block.0): 1045 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1046 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1047 ; GFX908-NEXT: {{ $}} 1048 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1049 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1050 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1051 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1052 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1053 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1054 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1055 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1056 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1057 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1058 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1059 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1060 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1061 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1062 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1063 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1064 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1065 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1066 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1067 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1068 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1069 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1070 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1071 ; GFX908-NEXT: {{ $}} 1072 ; GFX908-NEXT: bb.1: 1073 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1074 ; GFX908-NEXT: {{ $}} 1075 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1076 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1077 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1078 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1079 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1080 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1081 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1082 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1083 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1084 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1085 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1086 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1087 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1088 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1089 ; GFX908-NEXT: {{ $}} 1090 ; GFX908-NEXT: bb.2: 1091 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1092 ; GFX908-NEXT: {{ $}} 1093 ; GFX908-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) 1094 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1095 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1096 ; GFX908-NEXT: {{ $}} 1097 ; GFX908-NEXT: bb.3: 1098 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1099 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 1100 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1101 %val = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1102 %zext = zext i8 %val to i32 1103 %cast = bitcast i32 %zext to float 1104 ret float %cast 1105} 1106 1107define float @raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1108 ; GFX908-LABEL: name: raw_ptr_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext 1109 ; GFX908: bb.0 (%ir-block.0): 1110 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1111 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1112 ; GFX908-NEXT: {{ $}} 1113 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1114 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1115 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1116 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1117 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1118 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1119 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1120 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1121 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1122 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1123 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1124 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1125 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1126 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1127 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1128 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1129 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1130 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1131 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1132 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1133 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1134 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1135 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1136 ; GFX908-NEXT: {{ $}} 1137 ; GFX908-NEXT: bb.1: 1138 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1139 ; GFX908-NEXT: {{ $}} 1140 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1141 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1142 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1143 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1144 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1145 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1146 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1147 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1148 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1149 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1150 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1151 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1152 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1153 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1154 ; GFX908-NEXT: {{ $}} 1155 ; GFX908-NEXT: bb.2: 1156 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1157 ; GFX908-NEXT: {{ $}} 1158 ; GFX908-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) 1159 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1160 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1161 ; GFX908-NEXT: {{ $}} 1162 ; GFX908-NEXT: bb.3: 1163 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1164 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_OFFEN]] 1165 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1166 %val = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1167 %zext = sext i8 %val to i32 1168 %cast = bitcast i32 %zext to float 1169 ret float %cast 1170} 1171 1172define float @raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1173 ; GFX908-LABEL: name: raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext 1174 ; GFX908: bb.0 (%ir-block.0): 1175 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1176 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1177 ; GFX908-NEXT: {{ $}} 1178 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1179 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1180 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1181 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1182 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1183 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1184 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1185 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1186 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1187 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1188 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1189 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1190 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1191 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1192 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1193 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1194 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1195 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1196 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1197 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1198 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1199 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1200 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1201 ; GFX908-NEXT: {{ $}} 1202 ; GFX908-NEXT: bb.1: 1203 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1204 ; GFX908-NEXT: {{ $}} 1205 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1206 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1207 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1208 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1209 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1210 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1211 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1212 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1213 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1214 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1215 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1216 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1217 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1218 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1219 ; GFX908-NEXT: {{ $}} 1220 ; GFX908-NEXT: bb.2: 1221 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1222 ; GFX908-NEXT: {{ $}} 1223 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 1224 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1225 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1226 ; GFX908-NEXT: {{ $}} 1227 ; GFX908-NEXT: bb.3: 1228 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1229 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] 1230 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1231 %val = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1232 %zext = zext i16 %val to i32 1233 %cast = bitcast i32 %zext to float 1234 ret float %cast 1235} 1236 1237define float @raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1238 ; GFX908-LABEL: name: raw_ptr_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext 1239 ; GFX908: bb.0 (%ir-block.0): 1240 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1241 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1242 ; GFX908-NEXT: {{ $}} 1243 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1244 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1245 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1246 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1247 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1248 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1249 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1250 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1251 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1252 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1253 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1254 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1255 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1256 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1257 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1258 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1259 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1260 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1261 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1262 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1263 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1264 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1265 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1266 ; GFX908-NEXT: {{ $}} 1267 ; GFX908-NEXT: bb.1: 1268 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1269 ; GFX908-NEXT: {{ $}} 1270 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1271 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1272 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1273 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1274 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1275 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1276 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1277 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1278 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1279 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1280 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1281 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1282 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1283 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1284 ; GFX908-NEXT: {{ $}} 1285 ; GFX908-NEXT: bb.2: 1286 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1287 ; GFX908-NEXT: {{ $}} 1288 ; GFX908-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 1289 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1290 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1291 ; GFX908-NEXT: {{ $}} 1292 ; GFX908-NEXT: bb.3: 1293 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1294 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_OFFEN]] 1295 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1296 %val = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1297 %sext = sext i16 %val to i32 1298 %cast = bitcast i32 %sext to float 1299 ret float %cast 1300} 1301 1302; Waterfall for rsrc 1303define half @raw_ptr_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1304 ; GFX908-LABEL: name: raw_ptr_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset 1305 ; GFX908: bb.0 (%ir-block.0): 1306 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1307 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1308 ; GFX908-NEXT: {{ $}} 1309 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1310 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1311 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1312 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1313 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1314 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1315 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1316 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1317 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1318 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1319 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1320 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1321 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1322 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1323 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1324 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1325 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1326 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1327 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1328 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1329 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1330 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1331 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1332 ; GFX908-NEXT: {{ $}} 1333 ; GFX908-NEXT: bb.1: 1334 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1335 ; GFX908-NEXT: {{ $}} 1336 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1337 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1338 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1339 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1340 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1341 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1342 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1343 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1344 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1345 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1346 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1347 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1348 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1349 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1350 ; GFX908-NEXT: {{ $}} 1351 ; GFX908-NEXT: bb.2: 1352 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1353 ; GFX908-NEXT: {{ $}} 1354 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 1355 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1356 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1357 ; GFX908-NEXT: {{ $}} 1358 ; GFX908-NEXT: bb.3: 1359 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1360 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_USHORT_OFFEN]] 1361 ; GFX908-NEXT: $vgpr0 = COPY [[COPY10]] 1362 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1363 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1364 ret half %val 1365} 1366 1367; Waterfall for rsrc 1368define float @raw_ptr_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset) { 1369 ; GFX908-LABEL: name: raw_ptr_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset 1370 ; GFX908: bb.0 (%ir-block.0): 1371 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1372 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1373 ; GFX908-NEXT: {{ $}} 1374 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1375 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1376 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1377 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1378 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1379 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1380 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1381 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1382 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1383 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1384 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1385 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1386 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1387 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1388 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1389 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1390 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1391 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1392 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1393 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1394 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1395 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1396 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1397 ; GFX908-NEXT: {{ $}} 1398 ; GFX908-NEXT: bb.1: 1399 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1400 ; GFX908-NEXT: {{ $}} 1401 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1402 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1403 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1404 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1405 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1406 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1407 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1408 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1409 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1410 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1411 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1412 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1413 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1414 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1415 ; GFX908-NEXT: {{ $}} 1416 ; GFX908-NEXT: bb.2: 1417 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1418 ; GFX908-NEXT: {{ $}} 1419 ; GFX908-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from %ir.rsrc, addrspace 8) 1420 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1421 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1422 ; GFX908-NEXT: {{ $}} 1423 ; GFX908-NEXT: bb.3: 1424 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1425 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 1426 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1427 %val = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1428 %zext = zext i8 %val to i32 1429 %cast = bitcast i32 %zext to float 1430 ret float %cast 1431} 1432 1433define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(ptr addrspace(8) %rsrc, i32 %soffset) { 1434 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0 1435 ; GFX908: bb.0 (%ir-block.0): 1436 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1437 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1438 ; GFX908-NEXT: {{ $}} 1439 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1440 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1441 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1442 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1443 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1444 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1445 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1446 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 1447 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1448 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1449 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1450 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1451 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 1452 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1453 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1454 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1455 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1456 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1457 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1458 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 1459 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1460 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1461 ; GFX908-NEXT: {{ $}} 1462 ; GFX908-NEXT: bb.1: 1463 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1464 ; GFX908-NEXT: {{ $}} 1465 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1466 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1467 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1468 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1469 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1470 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1471 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1472 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1473 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1474 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1475 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1476 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1477 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1478 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1479 ; GFX908-NEXT: {{ $}} 1480 ; GFX908-NEXT: bb.2: 1481 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1482 ; GFX908-NEXT: {{ $}} 1483 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1484 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1485 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1486 ; GFX908-NEXT: {{ $}} 1487 ; GFX908-NEXT: bb.3: 1488 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1489 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 1490 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1491 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 1492 ret float %val 1493} 1494 1495define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(ptr addrspace(8) %rsrc, i32 %soffset) { 1496 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095 1497 ; GFX908: bb.0 (%ir-block.0): 1498 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1499 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1500 ; GFX908-NEXT: {{ $}} 1501 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1502 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1503 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1504 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1505 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1506 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1507 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1508 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 1509 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1510 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1511 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1512 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1513 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 1514 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1515 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1516 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1517 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1518 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1519 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1520 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 1521 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1522 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1523 ; GFX908-NEXT: {{ $}} 1524 ; GFX908-NEXT: bb.1: 1525 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1526 ; GFX908-NEXT: {{ $}} 1527 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1528 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1529 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1530 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1531 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1532 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1533 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1534 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1535 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1536 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1537 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1538 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1539 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1540 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1541 ; GFX908-NEXT: {{ $}} 1542 ; GFX908-NEXT: bb.2: 1543 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1544 ; GFX908-NEXT: {{ $}} 1545 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1546 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1547 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1548 ; GFX908-NEXT: {{ $}} 1549 ; GFX908-NEXT: bb.3: 1550 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1551 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 1552 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1553 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) 1554 ret float %val 1555} 1556 1557define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(ptr addrspace(8) %rsrc, i32 %soffset) { 1558 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096 1559 ; GFX908: bb.0 (%ir-block.0): 1560 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1561 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1562 ; GFX908-NEXT: {{ $}} 1563 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1564 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1565 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1566 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1567 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1568 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1569 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1570 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 1571 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1572 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1573 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1574 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1575 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 1576 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1577 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1578 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1579 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1580 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1581 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1582 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 1583 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec 1584 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1585 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1586 ; GFX908-NEXT: {{ $}} 1587 ; GFX908-NEXT: bb.1: 1588 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1589 ; GFX908-NEXT: {{ $}} 1590 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1591 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1592 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1593 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1594 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1595 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1596 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1597 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1598 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1599 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1600 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1601 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1602 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1603 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1604 ; GFX908-NEXT: {{ $}} 1605 ; GFX908-NEXT: bb.2: 1606 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1607 ; GFX908-NEXT: {{ $}} 1608 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1609 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1610 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1611 ; GFX908-NEXT: {{ $}} 1612 ; GFX908-NEXT: bb.3: 1613 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1614 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1615 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1616 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4096, i32 %soffset, i32 0) 1617 ret float %val 1618} 1619 1620define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(ptr addrspace(8) %rsrc, i32 %voffset.base, i32 %soffset) { 1621 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16 1622 ; GFX908: bb.0 (%ir-block.0): 1623 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1624 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1625 ; GFX908-NEXT: {{ $}} 1626 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1627 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1628 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1629 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1630 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1631 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1632 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1633 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1634 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1635 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1636 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1637 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1638 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1639 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1640 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1641 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1642 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1643 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1644 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1645 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1646 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1647 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1648 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1649 ; GFX908-NEXT: {{ $}} 1650 ; GFX908-NEXT: bb.1: 1651 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1652 ; GFX908-NEXT: {{ $}} 1653 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1654 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1655 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1656 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1657 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1658 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1659 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1660 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1661 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1662 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1663 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1664 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1665 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1666 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1667 ; GFX908-NEXT: {{ $}} 1668 ; GFX908-NEXT: bb.2: 1669 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1670 ; GFX908-NEXT: {{ $}} 1671 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1672 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1673 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1674 ; GFX908-NEXT: {{ $}} 1675 ; GFX908-NEXT: bb.3: 1676 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1677 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1678 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1679 %voffset = add i32 %voffset.base, 16 1680 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1681 ret float %val 1682} 1683 1684define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(ptr addrspace(8) %rsrc, i32 %voffset.base, i32 %soffset) { 1685 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 1686 ; GFX908: bb.0 (%ir-block.0): 1687 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1688 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1689 ; GFX908-NEXT: {{ $}} 1690 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1691 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1692 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1693 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1694 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1695 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1696 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1697 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1698 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1699 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1700 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1701 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1702 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1703 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1704 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1705 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1706 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1707 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1708 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1709 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1710 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1711 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1712 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1713 ; GFX908-NEXT: {{ $}} 1714 ; GFX908-NEXT: bb.1: 1715 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1716 ; GFX908-NEXT: {{ $}} 1717 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1718 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1719 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1720 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1721 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1722 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1723 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1724 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1725 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1726 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1727 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1728 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1729 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1730 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1731 ; GFX908-NEXT: {{ $}} 1732 ; GFX908-NEXT: bb.2: 1733 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1734 ; GFX908-NEXT: {{ $}} 1735 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1736 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1737 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1738 ; GFX908-NEXT: {{ $}} 1739 ; GFX908-NEXT: bb.3: 1740 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1741 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1742 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1743 %voffset = add i32 %voffset.base, 4095 1744 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1745 ret float %val 1746} 1747 1748define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(ptr addrspace(8) %rsrc, i32 %voffset.base, i32 %soffset) { 1749 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096 1750 ; GFX908: bb.0 (%ir-block.0): 1751 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1752 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1753 ; GFX908-NEXT: {{ $}} 1754 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1755 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1756 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1757 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1758 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1759 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1760 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1761 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1762 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1763 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1764 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1765 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1766 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1767 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1768 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1769 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1770 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1771 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1772 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1773 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1774 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1775 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1776 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 1777 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1778 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1779 ; GFX908-NEXT: {{ $}} 1780 ; GFX908-NEXT: bb.1: 1781 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1782 ; GFX908-NEXT: {{ $}} 1783 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1784 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1785 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1786 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1787 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1788 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1789 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1790 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1791 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1792 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1793 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1794 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1795 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1796 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1797 ; GFX908-NEXT: {{ $}} 1798 ; GFX908-NEXT: bb.2: 1799 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1800 ; GFX908-NEXT: {{ $}} 1801 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1802 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1803 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1804 ; GFX908-NEXT: {{ $}} 1805 ; GFX908-NEXT: bb.3: 1806 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1807 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1808 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1809 %voffset = add i32 %voffset.base, 4096 1810 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1811 ret float %val 1812} 1813 1814define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(ptr addrspace(8) %rsrc, i32 %voffset) { 1815 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095 1816 ; GFX908: bb.0 (%ir-block.0): 1817 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1818 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1819 ; GFX908-NEXT: {{ $}} 1820 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1821 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1822 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1823 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1824 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1825 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1826 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1827 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 1828 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1829 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1830 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1831 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1832 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 1833 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1834 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1835 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1836 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1837 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1838 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1839 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 1840 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 1841 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1842 ; GFX908-NEXT: {{ $}} 1843 ; GFX908-NEXT: bb.1: 1844 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1845 ; GFX908-NEXT: {{ $}} 1846 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1847 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1848 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1849 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1850 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1851 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1852 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1853 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1854 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1855 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1856 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1857 ; GFX908-NEXT: {{ $}} 1858 ; GFX908-NEXT: bb.2: 1859 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1860 ; GFX908-NEXT: {{ $}} 1861 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], killed [[REG_SEQUENCE5]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1862 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1863 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1864 ; GFX908-NEXT: {{ $}} 1865 ; GFX908-NEXT: bb.3: 1866 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1867 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1868 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1869 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) 1870 ret float %val 1871} 1872 1873define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(ptr addrspace(8) %rsrc, i32 %voffset) { 1874 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096 1875 ; GFX908: bb.0 (%ir-block.0): 1876 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1877 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1878 ; GFX908-NEXT: {{ $}} 1879 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1880 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1881 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1882 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1883 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1884 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1885 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1886 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 1887 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1888 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1889 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1890 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1891 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 1892 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1893 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1894 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1895 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1896 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1897 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1898 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 1899 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1900 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1901 ; GFX908-NEXT: {{ $}} 1902 ; GFX908-NEXT: bb.1: 1903 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1904 ; GFX908-NEXT: {{ $}} 1905 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1906 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1907 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1908 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1909 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1910 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1911 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1912 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1913 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1914 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1915 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1916 ; GFX908-NEXT: {{ $}} 1917 ; GFX908-NEXT: bb.2: 1918 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1919 ; GFX908-NEXT: {{ $}} 1920 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], killed [[REG_SEQUENCE5]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1921 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1922 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1923 ; GFX908-NEXT: {{ $}} 1924 ; GFX908-NEXT: bb.3: 1925 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1926 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1927 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1928 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) 1929 ret float %val 1930} 1931 1932define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset.base) { 1933 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16 1934 ; GFX908: bb.0 (%ir-block.0): 1935 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1936 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1937 ; GFX908-NEXT: {{ $}} 1938 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1939 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1940 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1941 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1942 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1943 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1944 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1945 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1946 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1947 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 1948 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 1949 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1950 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1951 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 1952 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 1953 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 1954 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1955 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1956 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1957 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1958 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 1959 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 1960 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1961 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1962 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1963 ; GFX908-NEXT: {{ $}} 1964 ; GFX908-NEXT: bb.1: 1965 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1966 ; GFX908-NEXT: {{ $}} 1967 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 1968 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 1969 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1970 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 1971 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 1972 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 1973 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1974 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 1975 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1976 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1977 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1978 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1979 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1980 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1981 ; GFX908-NEXT: {{ $}} 1982 ; GFX908-NEXT: bb.2: 1983 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1984 ; GFX908-NEXT: {{ $}} 1985 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 1986 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1987 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1988 ; GFX908-NEXT: {{ $}} 1989 ; GFX908-NEXT: bb.3: 1990 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1991 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1992 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1993 %soffset = add i32 %soffset.base, 16 1994 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 1995 ret float %val 1996} 1997 1998define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset.base) { 1999 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095 2000 ; GFX908: bb.0 (%ir-block.0): 2001 ; GFX908-NEXT: successors: %bb.1(0x80000000) 2002 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2003 ; GFX908-NEXT: {{ $}} 2004 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 2005 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 2006 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2007 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2008 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2009 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2010 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2011 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2012 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 2013 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 2014 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 2015 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2016 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2017 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 2018 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2019 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2020 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2021 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2022 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2023 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2024 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 2025 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 2026 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 2027 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 2028 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 2029 ; GFX908-NEXT: {{ $}} 2030 ; GFX908-NEXT: bb.1: 2031 ; GFX908-NEXT: successors: %bb.2(0x80000000) 2032 ; GFX908-NEXT: {{ $}} 2033 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 2034 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 2035 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2036 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 2037 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 2038 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 2039 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2040 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 2041 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 2042 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2043 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 2044 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 2045 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 2046 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 2047 ; GFX908-NEXT: {{ $}} 2048 ; GFX908-NEXT: bb.2: 2049 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 2050 ; GFX908-NEXT: {{ $}} 2051 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 2052 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2053 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 2054 ; GFX908-NEXT: {{ $}} 2055 ; GFX908-NEXT: bb.3: 2056 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 2057 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2058 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 2059 %soffset = add i32 %soffset.base, 4095 2060 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 2061 ret float %val 2062} 2063 2064define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset.base) { 2065 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096 2066 ; GFX908: bb.0 (%ir-block.0): 2067 ; GFX908-NEXT: successors: %bb.1(0x80000000) 2068 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2069 ; GFX908-NEXT: {{ $}} 2070 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 2071 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 2072 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2073 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2074 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2075 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2076 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2077 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2078 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 2079 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 2080 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 2081 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2082 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2083 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 2084 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2085 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2086 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2087 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2088 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2089 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2090 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 2091 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2092 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 2093 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 2094 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 2095 ; GFX908-NEXT: {{ $}} 2096 ; GFX908-NEXT: bb.1: 2097 ; GFX908-NEXT: successors: %bb.2(0x80000000) 2098 ; GFX908-NEXT: {{ $}} 2099 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 2100 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 2101 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2102 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 2103 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 2104 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 2105 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2106 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 2107 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 2108 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2109 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 2110 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 2111 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 2112 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 2113 ; GFX908-NEXT: {{ $}} 2114 ; GFX908-NEXT: bb.2: 2115 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 2116 ; GFX908-NEXT: {{ $}} 2117 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 2118 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2119 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 2120 ; GFX908-NEXT: {{ $}} 2121 ; GFX908-NEXT: bb.3: 2122 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 2123 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2124 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 2125 %soffset = add i32 %soffset.base, 4096 2126 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 2127 ret float %val 2128} 2129 2130; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 2131define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset.base) { 2132 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000 2133 ; GFX908: bb.0 (%ir-block.0): 2134 ; GFX908-NEXT: successors: %bb.1(0x80000000) 2135 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2136 ; GFX908-NEXT: {{ $}} 2137 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 2138 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 2139 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2140 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2141 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2142 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2143 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2144 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2145 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 2146 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 2147 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 2148 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2149 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2150 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 2151 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2152 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2153 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2154 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2155 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2156 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2157 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 2158 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 2159 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 2160 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 2161 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 2162 ; GFX908-NEXT: {{ $}} 2163 ; GFX908-NEXT: bb.1: 2164 ; GFX908-NEXT: successors: %bb.2(0x80000000) 2165 ; GFX908-NEXT: {{ $}} 2166 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 2167 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 2168 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2169 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 2170 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 2171 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 2172 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2173 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 2174 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 2175 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2176 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 2177 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 2178 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 2179 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 2180 ; GFX908-NEXT: {{ $}} 2181 ; GFX908-NEXT: bb.2: 2182 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 2183 ; GFX908-NEXT: {{ $}} 2184 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 2185 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2186 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 2187 ; GFX908-NEXT: {{ $}} 2188 ; GFX908-NEXT: bb.3: 2189 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 2190 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2191 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 2192 %soffset = add i32 %soffset.base, 5000 2193 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 2194 ret float %val 2195} 2196 2197; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 2198define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(ptr addrspace(8) %rsrc, i32 %voffset.base, i32 %soffset) { 2199 ; GFX908-LABEL: name: raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000 2200 ; GFX908: bb.0 (%ir-block.0): 2201 ; GFX908-NEXT: successors: %bb.1(0x80000000) 2202 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 2203 ; GFX908-NEXT: {{ $}} 2204 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 2205 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 2206 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 2207 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 2208 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 2209 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2210 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2211 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2212 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 2213 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 2214 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 2215 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2216 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2217 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 2218 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 2219 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 2220 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2221 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2222 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2223 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 2224 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 2225 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 2226 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 2227 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 2228 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 2229 ; GFX908-NEXT: {{ $}} 2230 ; GFX908-NEXT: bb.1: 2231 ; GFX908-NEXT: successors: %bb.2(0x80000000) 2232 ; GFX908-NEXT: {{ $}} 2233 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub0, implicit $exec 2234 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub1, implicit $exec 2235 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 2236 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]].sub0_sub1, implicit $exec 2237 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub2, implicit $exec 2238 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE2]].sub3, implicit $exec 2239 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 2240 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE2]].sub2_sub3, implicit $exec 2241 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 2242 ; GFX908-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 2243 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 2244 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 2245 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 2246 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 2247 ; GFX908-NEXT: {{ $}} 2248 ; GFX908-NEXT: bb.2: 2249 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 2250 ; GFX908-NEXT: {{ $}} 2251 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE5]], killed [[V_READFIRSTLANE_B32_4]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 2252 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 2253 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 2254 ; GFX908-NEXT: {{ $}} 2255 ; GFX908-NEXT: bb.3: 2256 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 2257 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 2258 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 2259 %voffset = add i32 %voffset.base, 5000 2260 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 2261 ret float %val 2262} 2263 2264declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) 2265declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg) 2266declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg) 2267declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg) 2268 2269declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg) 2270declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg) 2271declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg) 2272declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg) 2273 2274declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32 immarg) 2275declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32 immarg) 2276