1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s 3 4; Natural mapping 5define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 6 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7 ; GFX908: bb.0 (%ir-block.0): 8 ; GFX908-NEXT: successors: %bb.1(0x80000000) 9 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 10 ; GFX908-NEXT: {{ $}} 11 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 12 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 13 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 16 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 18 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 19 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 20 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 21 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 22 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 23 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 24 ; GFX908-NEXT: {{ $}} 25 ; GFX908-NEXT: bb.1: 26 ; GFX908-NEXT: successors: %bb.2(0x80000000) 27 ; GFX908-NEXT: {{ $}} 28 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 29 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 30 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 31 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 32 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 33 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 34 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 35 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 36 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 37 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 38 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 39 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 40 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 41 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 42 ; GFX908-NEXT: {{ $}} 43 ; GFX908-NEXT: bb.2: 44 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 45 ; GFX908-NEXT: {{ $}} 46 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 47 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 48 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 49 ; GFX908-NEXT: {{ $}} 50 ; GFX908-NEXT: bb.3: 51 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 52 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 53 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 54 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 55 ret float %val 56} 57 58; Copies for VGPR arguments 59define float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 60 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset 61 ; GFX908: bb.0 (%ir-block.0): 62 ; GFX908-NEXT: successors: %bb.1(0x80000000) 63 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 64 ; GFX908-NEXT: {{ $}} 65 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 66 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 67 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 68 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 69 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 70 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 71 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 72 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 73 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 74 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 75 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 76 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 77 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 78 ; GFX908-NEXT: {{ $}} 79 ; GFX908-NEXT: bb.1: 80 ; GFX908-NEXT: successors: %bb.2(0x80000000) 81 ; GFX908-NEXT: {{ $}} 82 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 83 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 84 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 85 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 86 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 87 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 88 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 89 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 90 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 91 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 92 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 93 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 94 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 95 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 96 ; GFX908-NEXT: {{ $}} 97 ; GFX908-NEXT: bb.2: 98 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 99 ; GFX908-NEXT: {{ $}} 100 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 101 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 102 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 103 ; GFX908-NEXT: {{ $}} 104 ; GFX908-NEXT: bb.3: 105 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 106 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 107 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 108 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 109 ret float %val 110} 111 112; Waterfall for rsrc 113define float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 114 ; GFX908-LABEL: name: raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset 115 ; GFX908: bb.0 (%ir-block.0): 116 ; GFX908-NEXT: successors: %bb.1(0x80000000) 117 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 118 ; GFX908-NEXT: {{ $}} 119 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 120 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 121 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 122 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 123 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 124 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 125 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 126 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 127 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 128 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 129 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 130 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 131 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 132 ; GFX908-NEXT: {{ $}} 133 ; GFX908-NEXT: bb.1: 134 ; GFX908-NEXT: successors: %bb.2(0x80000000) 135 ; GFX908-NEXT: {{ $}} 136 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 137 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 138 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 139 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 140 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 141 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 142 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 143 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 144 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 145 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 146 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 147 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 148 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 149 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 150 ; GFX908-NEXT: {{ $}} 151 ; GFX908-NEXT: bb.2: 152 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 153 ; GFX908-NEXT: {{ $}} 154 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 155 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 156 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 157 ; GFX908-NEXT: {{ $}} 158 ; GFX908-NEXT: bb.3: 159 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 160 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 161 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 162 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 163 ret float %val 164} 165 166; Waterfall for rsrc and soffset 167define float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 168 ; GFX908-LABEL: name: raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset 169 ; GFX908: bb.0 (%ir-block.0): 170 ; GFX908-NEXT: successors: %bb.1(0x80000000) 171 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 172 ; GFX908-NEXT: {{ $}} 173 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 174 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 175 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 176 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 177 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 178 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 179 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 180 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 181 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 182 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 183 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 184 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 185 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 186 ; GFX908-NEXT: {{ $}} 187 ; GFX908-NEXT: bb.1: 188 ; GFX908-NEXT: successors: %bb.2(0x80000000) 189 ; GFX908-NEXT: {{ $}} 190 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 191 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 192 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 193 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 194 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 195 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 196 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 197 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 198 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 199 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 200 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 201 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 202 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 203 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 204 ; GFX908-NEXT: {{ $}} 205 ; GFX908-NEXT: bb.2: 206 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 207 ; GFX908-NEXT: {{ $}} 208 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 209 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 210 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 211 ; GFX908-NEXT: {{ $}} 212 ; GFX908-NEXT: bb.3: 213 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 214 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 215 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 216 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 217 ret float %val 218} 219 220; Natural mapping + glc 221define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 222 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 223 ; GFX908: bb.0 (%ir-block.0): 224 ; GFX908-NEXT: successors: %bb.1(0x80000000) 225 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 226 ; GFX908-NEXT: {{ $}} 227 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 228 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 229 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 230 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 231 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 232 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 233 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 234 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 235 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 236 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 237 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 238 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 239 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 240 ; GFX908-NEXT: {{ $}} 241 ; GFX908-NEXT: bb.1: 242 ; GFX908-NEXT: successors: %bb.2(0x80000000) 243 ; GFX908-NEXT: {{ $}} 244 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 245 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 246 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 247 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 248 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 249 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 250 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 251 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 252 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 253 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 254 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 255 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 256 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 257 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 258 ; GFX908-NEXT: {{ $}} 259 ; GFX908-NEXT: bb.2: 260 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 261 ; GFX908-NEXT: {{ $}} 262 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 263 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 264 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 265 ; GFX908-NEXT: {{ $}} 266 ; GFX908-NEXT: bb.3: 267 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 268 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 269 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 270 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) 271 ret float %val 272} 273 274; Natural mapping + slc 275define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 276 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 277 ; GFX908: bb.0 (%ir-block.0): 278 ; GFX908-NEXT: successors: %bb.1(0x80000000) 279 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 280 ; GFX908-NEXT: {{ $}} 281 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 282 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 283 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 284 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 285 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 286 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 287 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 288 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 289 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 290 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 291 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 292 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 293 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 294 ; GFX908-NEXT: {{ $}} 295 ; GFX908-NEXT: bb.1: 296 ; GFX908-NEXT: successors: %bb.2(0x80000000) 297 ; GFX908-NEXT: {{ $}} 298 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 299 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 300 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 301 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 302 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 303 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 304 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 305 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 306 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 307 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 308 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 309 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 310 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 311 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 312 ; GFX908-NEXT: {{ $}} 313 ; GFX908-NEXT: bb.2: 314 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 315 ; GFX908-NEXT: {{ $}} 316 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 317 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 318 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 319 ; GFX908-NEXT: {{ $}} 320 ; GFX908-NEXT: bb.3: 321 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 322 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 323 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 324 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) 325 ret float %val 326} 327 328; Natural mapping + dlc 329define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 330 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 331 ; GFX908: bb.0 (%ir-block.0): 332 ; GFX908-NEXT: successors: %bb.1(0x80000000) 333 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 334 ; GFX908-NEXT: {{ $}} 335 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 336 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 337 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 338 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 339 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 340 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 341 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 342 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 343 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 344 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 345 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 346 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 347 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 348 ; GFX908-NEXT: {{ $}} 349 ; GFX908-NEXT: bb.1: 350 ; GFX908-NEXT: successors: %bb.2(0x80000000) 351 ; GFX908-NEXT: {{ $}} 352 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 353 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 354 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 355 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 356 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 357 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 358 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 359 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 360 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 361 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 362 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 363 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 364 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 365 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 366 ; GFX908-NEXT: {{ $}} 367 ; GFX908-NEXT: bb.2: 368 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 369 ; GFX908-NEXT: {{ $}} 370 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 371 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 372 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 373 ; GFX908-NEXT: {{ $}} 374 ; GFX908-NEXT: bb.3: 375 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 376 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 377 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 378 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) 379 ret float %val 380} 381 382; Natural mapping + slc + dlc 383define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 384 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_dlc 385 ; GFX908: bb.0 (%ir-block.0): 386 ; GFX908-NEXT: successors: %bb.1(0x80000000) 387 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 388 ; GFX908-NEXT: {{ $}} 389 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 390 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 391 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 392 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 393 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 394 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 395 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 396 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 397 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 398 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 399 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 400 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 401 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 402 ; GFX908-NEXT: {{ $}} 403 ; GFX908-NEXT: bb.1: 404 ; GFX908-NEXT: successors: %bb.2(0x80000000) 405 ; GFX908-NEXT: {{ $}} 406 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 407 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 408 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 409 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 410 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 411 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 412 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 413 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 414 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 415 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 416 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 417 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 418 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 419 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 420 ; GFX908-NEXT: {{ $}} 421 ; GFX908-NEXT: bb.2: 422 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 423 ; GFX908-NEXT: {{ $}} 424 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 6, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 425 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 426 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 427 ; GFX908-NEXT: {{ $}} 428 ; GFX908-NEXT: bb.3: 429 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 430 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 431 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 432 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) 433 ret float %val 434} 435 436; Natural mapping + glc + dlc 437define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 438 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_dlc 439 ; GFX908: bb.0 (%ir-block.0): 440 ; GFX908-NEXT: successors: %bb.1(0x80000000) 441 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 442 ; GFX908-NEXT: {{ $}} 443 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 444 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 445 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 446 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 447 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 448 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 449 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 450 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 451 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 452 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 453 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 454 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 455 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 456 ; GFX908-NEXT: {{ $}} 457 ; GFX908-NEXT: bb.1: 458 ; GFX908-NEXT: successors: %bb.2(0x80000000) 459 ; GFX908-NEXT: {{ $}} 460 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 461 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 462 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 463 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 464 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 465 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 466 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 467 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 468 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 469 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 470 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 471 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 472 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 473 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 474 ; GFX908-NEXT: {{ $}} 475 ; GFX908-NEXT: bb.2: 476 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 477 ; GFX908-NEXT: {{ $}} 478 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 5, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 479 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 480 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 481 ; GFX908-NEXT: {{ $}} 482 ; GFX908-NEXT: bb.3: 483 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 484 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 485 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 486 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) 487 ret float %val 488} 489 490; Natural mapping + glc + slc + dlc 491define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 492 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc_slc_dlc 493 ; GFX908: bb.0 (%ir-block.0): 494 ; GFX908-NEXT: successors: %bb.1(0x80000000) 495 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 496 ; GFX908-NEXT: {{ $}} 497 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 498 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 499 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 500 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 501 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 502 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 503 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 504 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 505 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 506 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 507 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 508 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 509 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 510 ; GFX908-NEXT: {{ $}} 511 ; GFX908-NEXT: bb.1: 512 ; GFX908-NEXT: successors: %bb.2(0x80000000) 513 ; GFX908-NEXT: {{ $}} 514 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 515 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 516 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 517 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 518 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 519 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 520 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 521 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 522 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 523 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 524 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 525 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 526 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 527 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 528 ; GFX908-NEXT: {{ $}} 529 ; GFX908-NEXT: bb.2: 530 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 531 ; GFX908-NEXT: {{ $}} 532 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 7, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 533 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 534 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 535 ; GFX908-NEXT: {{ $}} 536 ; GFX908-NEXT: bb.3: 537 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 538 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 539 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 540 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) 541 ret float %val 542} 543 544; Natural mapping 545define <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 546 ; GFX908-LABEL: name: raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 547 ; GFX908: bb.0 (%ir-block.0): 548 ; GFX908-NEXT: successors: %bb.1(0x80000000) 549 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 550 ; GFX908-NEXT: {{ $}} 551 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 552 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 553 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 554 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 555 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 556 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 557 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 558 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 559 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 560 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 561 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 562 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 563 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 564 ; GFX908-NEXT: {{ $}} 565 ; GFX908-NEXT: bb.1: 566 ; GFX908-NEXT: successors: %bb.2(0x80000000) 567 ; GFX908-NEXT: {{ $}} 568 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 569 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 570 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 571 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 572 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 573 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 574 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 575 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 576 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 577 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 578 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 579 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 580 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 581 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 582 ; GFX908-NEXT: {{ $}} 583 ; GFX908-NEXT: bb.2: 584 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 585 ; GFX908-NEXT: {{ $}} 586 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8) 587 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 588 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 589 ; GFX908-NEXT: {{ $}} 590 ; GFX908-NEXT: bb.3: 591 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 592 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 593 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 594 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 595 ; GFX908-NEXT: $vgpr1 = COPY [[COPY7]] 596 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 597 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 598 ret <2 x float> %val 599} 600 601define <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 602 ; GFX908-LABEL: name: raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 603 ; GFX908: bb.0 (%ir-block.0): 604 ; GFX908-NEXT: successors: %bb.1(0x80000000) 605 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 606 ; GFX908-NEXT: {{ $}} 607 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 608 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 609 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 610 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 611 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 612 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 613 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 614 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 615 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 616 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 617 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 618 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 619 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 620 ; GFX908-NEXT: {{ $}} 621 ; GFX908-NEXT: bb.1: 622 ; GFX908-NEXT: successors: %bb.2(0x80000000) 623 ; GFX908-NEXT: {{ $}} 624 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 625 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 626 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 627 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 628 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 629 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 630 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 631 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 632 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 633 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 634 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 635 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 636 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 637 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 638 ; GFX908-NEXT: {{ $}} 639 ; GFX908-NEXT: bb.2: 640 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 641 ; GFX908-NEXT: {{ $}} 642 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8) 643 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 644 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 645 ; GFX908-NEXT: {{ $}} 646 ; GFX908-NEXT: bb.3: 647 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 648 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 649 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 650 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 651 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 652 ; GFX908-NEXT: $vgpr1 = COPY [[COPY7]] 653 ; GFX908-NEXT: $vgpr2 = COPY [[COPY8]] 654 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 655 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 656 ret <3 x float> %val 657} 658 659define <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 660 ; GFX908-LABEL: name: raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 661 ; GFX908: bb.0 (%ir-block.0): 662 ; GFX908-NEXT: successors: %bb.1(0x80000000) 663 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 664 ; GFX908-NEXT: {{ $}} 665 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 666 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 667 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 668 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 669 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 670 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 671 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 672 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 673 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 674 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 675 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 676 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 677 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 678 ; GFX908-NEXT: {{ $}} 679 ; GFX908-NEXT: bb.1: 680 ; GFX908-NEXT: successors: %bb.2(0x80000000) 681 ; GFX908-NEXT: {{ $}} 682 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 683 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 684 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 685 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 686 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 687 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 688 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 689 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 690 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 691 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 692 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 693 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 694 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 695 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 696 ; GFX908-NEXT: {{ $}} 697 ; GFX908-NEXT: bb.2: 698 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 699 ; GFX908-NEXT: {{ $}} 700 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) 701 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 702 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 703 ; GFX908-NEXT: {{ $}} 704 ; GFX908-NEXT: bb.3: 705 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 706 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 707 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 708 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 709 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 710 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 711 ; GFX908-NEXT: $vgpr1 = COPY [[COPY7]] 712 ; GFX908-NEXT: $vgpr2 = COPY [[COPY8]] 713 ; GFX908-NEXT: $vgpr3 = COPY [[COPY9]] 714 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 715 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 716 ret <4 x float> %val 717} 718 719define half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 720 ; GFX908-LABEL: name: raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 721 ; GFX908: bb.0 (%ir-block.0): 722 ; GFX908-NEXT: successors: %bb.1(0x80000000) 723 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 724 ; GFX908-NEXT: {{ $}} 725 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 726 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 727 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 728 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 729 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 730 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 731 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 732 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 733 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 734 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 735 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 736 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 737 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 738 ; GFX908-NEXT: {{ $}} 739 ; GFX908-NEXT: bb.1: 740 ; GFX908-NEXT: successors: %bb.2(0x80000000) 741 ; GFX908-NEXT: {{ $}} 742 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 743 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 744 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 745 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 746 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 747 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 748 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 749 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 750 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 751 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 752 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 753 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 754 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 755 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 756 ; GFX908-NEXT: {{ $}} 757 ; GFX908-NEXT: bb.2: 758 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 759 ; GFX908-NEXT: {{ $}} 760 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 761 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 762 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 763 ; GFX908-NEXT: {{ $}} 764 ; GFX908-NEXT: bb.3: 765 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 766 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_USHORT_OFFEN]] 767 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 768 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 769 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 770 ret half %val 771} 772 773define <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 774 ; GFX908-LABEL: name: raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 775 ; GFX908: bb.0 (%ir-block.0): 776 ; GFX908-NEXT: successors: %bb.1(0x80000000) 777 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 778 ; GFX908-NEXT: {{ $}} 779 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 780 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 781 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 782 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 783 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 784 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 785 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 786 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 787 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 788 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 789 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 790 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 791 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 792 ; GFX908-NEXT: {{ $}} 793 ; GFX908-NEXT: bb.1: 794 ; GFX908-NEXT: successors: %bb.2(0x80000000) 795 ; GFX908-NEXT: {{ $}} 796 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 797 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 798 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 799 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 800 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 801 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 802 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 803 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 804 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 805 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 806 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 807 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 808 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 809 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 810 ; GFX908-NEXT: {{ $}} 811 ; GFX908-NEXT: bb.2: 812 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 813 ; GFX908-NEXT: {{ $}} 814 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 815 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 816 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 817 ; GFX908-NEXT: {{ $}} 818 ; GFX908-NEXT: bb.3: 819 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 820 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 821 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 822 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 823 ret <2 x half> %val 824} 825 826define <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 827 ; GFX908-LABEL: name: raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 828 ; GFX908: bb.0 (%ir-block.0): 829 ; GFX908-NEXT: successors: %bb.1(0x80000000) 830 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 831 ; GFX908-NEXT: {{ $}} 832 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 833 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 834 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 835 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 836 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 837 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 838 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 839 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 840 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 841 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 842 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 843 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 844 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 845 ; GFX908-NEXT: {{ $}} 846 ; GFX908-NEXT: bb.1: 847 ; GFX908-NEXT: successors: %bb.2(0x80000000) 848 ; GFX908-NEXT: {{ $}} 849 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 850 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 851 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 852 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 853 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 854 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 855 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 856 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 857 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 858 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 859 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 860 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 861 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 862 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 863 ; GFX908-NEXT: {{ $}} 864 ; GFX908-NEXT: bb.2: 865 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 866 ; GFX908-NEXT: {{ $}} 867 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8) 868 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 869 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 870 ; GFX908-NEXT: {{ $}} 871 ; GFX908-NEXT: bb.3: 872 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 873 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 874 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 875 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 876 ; GFX908-NEXT: $vgpr1 = COPY [[COPY7]] 877 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 878 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 879 ret <4 x half> %val 880} 881 882define float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 883 ; GFX908-LABEL: name: raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext 884 ; GFX908: bb.0 (%ir-block.0): 885 ; GFX908-NEXT: successors: %bb.1(0x80000000) 886 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 887 ; GFX908-NEXT: {{ $}} 888 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 889 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 890 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 891 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 892 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 893 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 894 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 895 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 896 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 897 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 898 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 899 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 900 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 901 ; GFX908-NEXT: {{ $}} 902 ; GFX908-NEXT: bb.1: 903 ; GFX908-NEXT: successors: %bb.2(0x80000000) 904 ; GFX908-NEXT: {{ $}} 905 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 906 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 907 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 908 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 909 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 910 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 911 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 912 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 913 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 914 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 915 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 916 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 917 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 918 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 919 ; GFX908-NEXT: {{ $}} 920 ; GFX908-NEXT: bb.2: 921 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 922 ; GFX908-NEXT: {{ $}} 923 ; GFX908-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 924 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 925 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 926 ; GFX908-NEXT: {{ $}} 927 ; GFX908-NEXT: bb.3: 928 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 929 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 930 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 931 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 932 %zext = zext i8 %val to i32 933 %cast = bitcast i32 %zext to float 934 ret float %cast 935} 936 937define float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 938 ; GFX908-LABEL: name: raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext 939 ; GFX908: bb.0 (%ir-block.0): 940 ; GFX908-NEXT: successors: %bb.1(0x80000000) 941 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 942 ; GFX908-NEXT: {{ $}} 943 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 944 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 945 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 946 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 947 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 948 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 949 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 950 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 951 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 952 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 953 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 954 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 955 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 956 ; GFX908-NEXT: {{ $}} 957 ; GFX908-NEXT: bb.1: 958 ; GFX908-NEXT: successors: %bb.2(0x80000000) 959 ; GFX908-NEXT: {{ $}} 960 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 961 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 962 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 963 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 964 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 965 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 966 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 967 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 968 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 969 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 970 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 971 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 972 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 973 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 974 ; GFX908-NEXT: {{ $}} 975 ; GFX908-NEXT: bb.2: 976 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 977 ; GFX908-NEXT: {{ $}} 978 ; GFX908-NEXT: [[BUFFER_LOAD_SBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 979 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 980 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 981 ; GFX908-NEXT: {{ $}} 982 ; GFX908-NEXT: bb.3: 983 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 984 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_OFFEN]] 985 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 986 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 987 %zext = sext i8 %val to i32 988 %cast = bitcast i32 %zext to float 989 ret float %cast 990} 991 992define float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 993 ; GFX908-LABEL: name: raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_zext 994 ; GFX908: bb.0 (%ir-block.0): 995 ; GFX908-NEXT: successors: %bb.1(0x80000000) 996 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 997 ; GFX908-NEXT: {{ $}} 998 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 999 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1000 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1001 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1002 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1003 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1004 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1005 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1006 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1007 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1008 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1009 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1010 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1011 ; GFX908-NEXT: {{ $}} 1012 ; GFX908-NEXT: bb.1: 1013 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1014 ; GFX908-NEXT: {{ $}} 1015 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1016 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1017 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1018 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1019 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1020 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1021 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1022 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1023 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1024 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1025 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1026 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1027 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1028 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1029 ; GFX908-NEXT: {{ $}} 1030 ; GFX908-NEXT: bb.2: 1031 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1032 ; GFX908-NEXT: {{ $}} 1033 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 1034 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1035 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1036 ; GFX908-NEXT: {{ $}} 1037 ; GFX908-NEXT: bb.3: 1038 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1039 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] 1040 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1041 %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1042 %zext = zext i16 %val to i32 1043 %cast = bitcast i32 %zext to float 1044 ret float %cast 1045} 1046 1047define float @raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 1048 ; GFX908-LABEL: name: raw_buffer_load_i16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_sext 1049 ; GFX908: bb.0 (%ir-block.0): 1050 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1051 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1052 ; GFX908-NEXT: {{ $}} 1053 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1054 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1055 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1056 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1057 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1058 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1059 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1060 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1061 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1062 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1063 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1064 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1065 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1066 ; GFX908-NEXT: {{ $}} 1067 ; GFX908-NEXT: bb.1: 1068 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1069 ; GFX908-NEXT: {{ $}} 1070 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1071 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1072 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1073 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1074 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1075 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1076 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1077 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1078 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1079 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1080 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1081 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1082 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1083 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1084 ; GFX908-NEXT: {{ $}} 1085 ; GFX908-NEXT: bb.2: 1086 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1087 ; GFX908-NEXT: {{ $}} 1088 ; GFX908-NEXT: [[BUFFER_LOAD_SSHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 1089 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1090 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1091 ; GFX908-NEXT: {{ $}} 1092 ; GFX908-NEXT: bb.3: 1093 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1094 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_OFFEN]] 1095 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1096 %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1097 %sext = sext i16 %val to i32 1098 %cast = bitcast i32 %sext to float 1099 ret float %cast 1100} 1101 1102; Waterfall for rsrc 1103define half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 1104 ; GFX908-LABEL: name: raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffset 1105 ; GFX908: bb.0 (%ir-block.0): 1106 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1107 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1108 ; GFX908-NEXT: {{ $}} 1109 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1110 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1111 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1112 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1113 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1114 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1115 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1116 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1117 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1118 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1119 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1120 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1121 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1122 ; GFX908-NEXT: {{ $}} 1123 ; GFX908-NEXT: bb.1: 1124 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1125 ; GFX908-NEXT: {{ $}} 1126 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1127 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1128 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1129 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1130 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1131 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1132 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1133 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1134 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1135 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1136 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1137 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1138 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1139 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1140 ; GFX908-NEXT: {{ $}} 1141 ; GFX908-NEXT: bb.2: 1142 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1143 ; GFX908-NEXT: {{ $}} 1144 ; GFX908-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 1145 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1146 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1147 ; GFX908-NEXT: {{ $}} 1148 ; GFX908-NEXT: bb.3: 1149 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1150 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_LOAD_USHORT_OFFEN]] 1151 ; GFX908-NEXT: $vgpr0 = COPY [[COPY6]] 1152 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1153 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1154 ret half %val 1155} 1156 1157; Waterfall for rsrc 1158define float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 1159 ; GFX908-LABEL: name: raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffset 1160 ; GFX908: bb.0 (%ir-block.0): 1161 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1162 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1163 ; GFX908-NEXT: {{ $}} 1164 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1165 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1166 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1167 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1168 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1169 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1170 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1171 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1172 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1173 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1174 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1175 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1176 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1177 ; GFX908-NEXT: {{ $}} 1178 ; GFX908-NEXT: bb.1: 1179 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1180 ; GFX908-NEXT: {{ $}} 1181 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1182 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1183 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1184 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1185 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1186 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1187 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1188 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1189 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1190 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1191 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1192 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1193 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1194 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1195 ; GFX908-NEXT: {{ $}} 1196 ; GFX908-NEXT: bb.2: 1197 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1198 ; GFX908-NEXT: {{ $}} 1199 ; GFX908-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 1200 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1201 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1202 ; GFX908-NEXT: {{ $}} 1203 ; GFX908-NEXT: bb.3: 1204 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1205 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] 1206 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1207 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1208 %zext = zext i8 %val to i32 1209 %cast = bitcast i32 %zext to float 1210 ret float %cast 1211} 1212 1213define float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(<4 x i32> %rsrc, i32 %soffset) { 1214 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0 1215 ; GFX908: bb.0 (%ir-block.0): 1216 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1217 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1218 ; GFX908-NEXT: {{ $}} 1219 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1220 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1221 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1222 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1223 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1224 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1225 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1226 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1227 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1228 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1229 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1230 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1231 ; GFX908-NEXT: {{ $}} 1232 ; GFX908-NEXT: bb.1: 1233 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1234 ; GFX908-NEXT: {{ $}} 1235 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1236 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1237 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1238 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1239 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1240 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1241 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1242 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1243 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1244 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1245 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1246 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1247 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1248 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1249 ; GFX908-NEXT: {{ $}} 1250 ; GFX908-NEXT: bb.2: 1251 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1252 ; GFX908-NEXT: {{ $}} 1253 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1254 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1255 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1256 ; GFX908-NEXT: {{ $}} 1257 ; GFX908-NEXT: bb.3: 1258 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1259 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 1260 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1261 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 1262 ret float %val 1263} 1264 1265define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(<4 x i32> %rsrc, i32 %soffset) { 1266 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095 1267 ; GFX908: bb.0 (%ir-block.0): 1268 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1269 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1270 ; GFX908-NEXT: {{ $}} 1271 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1272 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1273 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1274 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1275 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1276 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1277 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1278 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1279 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1280 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1281 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1282 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1283 ; GFX908-NEXT: {{ $}} 1284 ; GFX908-NEXT: bb.1: 1285 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1286 ; GFX908-NEXT: {{ $}} 1287 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1288 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1289 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1290 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1291 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1292 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1293 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1294 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1295 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1296 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1297 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1298 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1299 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1300 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1301 ; GFX908-NEXT: {{ $}} 1302 ; GFX908-NEXT: bb.2: 1303 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1304 ; GFX908-NEXT: {{ $}} 1305 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1306 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1307 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1308 ; GFX908-NEXT: {{ $}} 1309 ; GFX908-NEXT: bb.3: 1310 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1311 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 1312 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1313 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 1314 ret float %val 1315} 1316 1317define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(<4 x i32> %rsrc, i32 %soffset) { 1318 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096 1319 ; GFX908: bb.0 (%ir-block.0): 1320 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1321 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1322 ; GFX908-NEXT: {{ $}} 1323 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1324 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1325 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1326 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1327 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1328 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1329 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1330 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1331 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1332 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1333 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec 1334 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1335 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1336 ; GFX908-NEXT: {{ $}} 1337 ; GFX908-NEXT: bb.1: 1338 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1339 ; GFX908-NEXT: {{ $}} 1340 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1341 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1342 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1343 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1344 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1345 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1346 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1347 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1348 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1349 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1350 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1351 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1352 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1353 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1354 ; GFX908-NEXT: {{ $}} 1355 ; GFX908-NEXT: bb.2: 1356 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1357 ; GFX908-NEXT: {{ $}} 1358 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1359 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1360 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1361 ; GFX908-NEXT: {{ $}} 1362 ; GFX908-NEXT: bb.3: 1363 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1364 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1365 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1366 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) 1367 ret float %val 1368} 1369 1370define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(<4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 1371 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16 1372 ; GFX908: bb.0 (%ir-block.0): 1373 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1374 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1375 ; GFX908-NEXT: {{ $}} 1376 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1377 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1378 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1379 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1380 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1381 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1382 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1383 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1384 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1385 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1386 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1387 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1388 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1389 ; GFX908-NEXT: {{ $}} 1390 ; GFX908-NEXT: bb.1: 1391 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1392 ; GFX908-NEXT: {{ $}} 1393 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1394 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1395 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1396 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1397 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1398 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1399 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1400 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1401 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1402 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1403 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1404 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1405 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1406 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1407 ; GFX908-NEXT: {{ $}} 1408 ; GFX908-NEXT: bb.2: 1409 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1410 ; GFX908-NEXT: {{ $}} 1411 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 16, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1412 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1413 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1414 ; GFX908-NEXT: {{ $}} 1415 ; GFX908-NEXT: bb.3: 1416 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1417 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1418 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1419 %voffset = add i32 %voffset.base, 16 1420 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1421 ret float %val 1422} 1423 1424define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(<4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 1425 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 1426 ; GFX908: bb.0 (%ir-block.0): 1427 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1428 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1429 ; GFX908-NEXT: {{ $}} 1430 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1431 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1432 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1433 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1434 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1435 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1436 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1437 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1438 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1439 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1440 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1441 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1442 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1443 ; GFX908-NEXT: {{ $}} 1444 ; GFX908-NEXT: bb.1: 1445 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1446 ; GFX908-NEXT: {{ $}} 1447 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1448 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1449 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1450 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1451 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1452 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1453 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1454 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1455 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1456 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1457 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1458 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1459 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1460 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1461 ; GFX908-NEXT: {{ $}} 1462 ; GFX908-NEXT: bb.2: 1463 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1464 ; GFX908-NEXT: {{ $}} 1465 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1466 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1467 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1468 ; GFX908-NEXT: {{ $}} 1469 ; GFX908-NEXT: bb.3: 1470 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1471 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1472 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1473 %voffset = add i32 %voffset.base, 4095 1474 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1475 ret float %val 1476} 1477 1478define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(<4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 1479 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096 1480 ; GFX908: bb.0 (%ir-block.0): 1481 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1482 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1483 ; GFX908-NEXT: {{ $}} 1484 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1485 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1486 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1487 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1488 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1489 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1490 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1491 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1492 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1493 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1494 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1495 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1496 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 1497 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1498 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1499 ; GFX908-NEXT: {{ $}} 1500 ; GFX908-NEXT: bb.1: 1501 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1502 ; GFX908-NEXT: {{ $}} 1503 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1504 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1505 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1506 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1507 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1508 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1509 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1510 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1511 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1512 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1513 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1514 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1515 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1516 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1517 ; GFX908-NEXT: {{ $}} 1518 ; GFX908-NEXT: bb.2: 1519 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1520 ; GFX908-NEXT: {{ $}} 1521 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1522 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1523 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1524 ; GFX908-NEXT: {{ $}} 1525 ; GFX908-NEXT: bb.3: 1526 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1527 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1528 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1529 %voffset = add i32 %voffset.base, 4096 1530 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1531 ret float %val 1532} 1533 1534define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(<4 x i32> %rsrc, i32 %voffset) { 1535 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095 1536 ; GFX908: bb.0 (%ir-block.0): 1537 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1538 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1539 ; GFX908-NEXT: {{ $}} 1540 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1541 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1542 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1543 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1544 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1545 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1546 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1547 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1548 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1549 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1550 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 1551 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1552 ; GFX908-NEXT: {{ $}} 1553 ; GFX908-NEXT: bb.1: 1554 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1555 ; GFX908-NEXT: {{ $}} 1556 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1557 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1558 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1559 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1560 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1561 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1562 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1563 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1564 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1565 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1566 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1567 ; GFX908-NEXT: {{ $}} 1568 ; GFX908-NEXT: bb.2: 1569 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1570 ; GFX908-NEXT: {{ $}} 1571 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1572 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1573 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1574 ; GFX908-NEXT: {{ $}} 1575 ; GFX908-NEXT: bb.3: 1576 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1577 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1578 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1579 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) 1580 ret float %val 1581} 1582 1583define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(<4 x i32> %rsrc, i32 %voffset) { 1584 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096 1585 ; GFX908: bb.0 (%ir-block.0): 1586 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1587 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1588 ; GFX908-NEXT: {{ $}} 1589 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1590 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1591 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1592 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1593 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1594 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1595 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1596 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1597 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1598 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1599 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1600 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1601 ; GFX908-NEXT: {{ $}} 1602 ; GFX908-NEXT: bb.1: 1603 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1604 ; GFX908-NEXT: {{ $}} 1605 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1606 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1607 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1608 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1609 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1610 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1611 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1612 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1613 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1614 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1615 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1616 ; GFX908-NEXT: {{ $}} 1617 ; GFX908-NEXT: bb.2: 1618 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1619 ; GFX908-NEXT: {{ $}} 1620 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1621 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1622 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1623 ; GFX908-NEXT: {{ $}} 1624 ; GFX908-NEXT: bb.3: 1625 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1626 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1627 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1628 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) 1629 ret float %val 1630} 1631 1632define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1633 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16 1634 ; GFX908: bb.0 (%ir-block.0): 1635 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1636 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1637 ; GFX908-NEXT: {{ $}} 1638 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1639 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1640 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1641 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1642 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1643 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1644 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1645 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1646 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1647 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1648 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1649 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 1650 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1651 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1652 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1653 ; GFX908-NEXT: {{ $}} 1654 ; GFX908-NEXT: bb.1: 1655 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1656 ; GFX908-NEXT: {{ $}} 1657 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1658 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1659 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1660 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1661 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1662 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1663 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1664 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1665 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1666 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1667 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1668 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1669 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1670 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1671 ; GFX908-NEXT: {{ $}} 1672 ; GFX908-NEXT: bb.2: 1673 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1674 ; GFX908-NEXT: {{ $}} 1675 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1676 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1677 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1678 ; GFX908-NEXT: {{ $}} 1679 ; GFX908-NEXT: bb.3: 1680 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1681 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1682 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1683 %soffset = add i32 %soffset.base, 16 1684 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1685 ret float %val 1686} 1687 1688define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(<4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1689 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095 1690 ; GFX908: bb.0 (%ir-block.0): 1691 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1692 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1693 ; GFX908-NEXT: {{ $}} 1694 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1695 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1696 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1697 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1698 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1699 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1700 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1701 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1702 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1703 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1704 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1705 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 1706 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1707 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1708 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1709 ; GFX908-NEXT: {{ $}} 1710 ; GFX908-NEXT: bb.1: 1711 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1712 ; GFX908-NEXT: {{ $}} 1713 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1714 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1715 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1716 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1717 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1718 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1719 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1720 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1721 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1722 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1723 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1724 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1725 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1726 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1727 ; GFX908-NEXT: {{ $}} 1728 ; GFX908-NEXT: bb.2: 1729 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1730 ; GFX908-NEXT: {{ $}} 1731 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1732 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1733 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1734 ; GFX908-NEXT: {{ $}} 1735 ; GFX908-NEXT: bb.3: 1736 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1737 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1738 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1739 %soffset = add i32 %soffset.base, 4095 1740 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1741 ret float %val 1742} 1743 1744define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(<4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1745 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096 1746 ; GFX908: bb.0 (%ir-block.0): 1747 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1748 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1749 ; GFX908-NEXT: {{ $}} 1750 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1751 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1752 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1753 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1754 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1755 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1756 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1757 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1758 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1759 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1760 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1761 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1762 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1763 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1764 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1765 ; GFX908-NEXT: {{ $}} 1766 ; GFX908-NEXT: bb.1: 1767 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1768 ; GFX908-NEXT: {{ $}} 1769 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1770 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1771 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1772 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1773 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1774 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1775 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1776 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1777 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1778 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1779 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1780 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1781 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1782 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1783 ; GFX908-NEXT: {{ $}} 1784 ; GFX908-NEXT: bb.2: 1785 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1786 ; GFX908-NEXT: {{ $}} 1787 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1788 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1789 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1790 ; GFX908-NEXT: {{ $}} 1791 ; GFX908-NEXT: bb.3: 1792 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1793 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1794 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1795 %soffset = add i32 %soffset.base, 4096 1796 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1797 ret float %val 1798} 1799 1800; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 1801define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(<4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1802 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000 1803 ; GFX908: bb.0 (%ir-block.0): 1804 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1805 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1806 ; GFX908-NEXT: {{ $}} 1807 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1808 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1809 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1810 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1811 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1812 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1813 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1814 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1815 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1816 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1817 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1818 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 1819 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1820 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1821 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1822 ; GFX908-NEXT: {{ $}} 1823 ; GFX908-NEXT: bb.1: 1824 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1825 ; GFX908-NEXT: {{ $}} 1826 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1827 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1828 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1829 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1830 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1831 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1832 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1833 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1834 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1835 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1836 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1837 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1838 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1839 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1840 ; GFX908-NEXT: {{ $}} 1841 ; GFX908-NEXT: bb.2: 1842 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1843 ; GFX908-NEXT: {{ $}} 1844 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1845 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1846 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1847 ; GFX908-NEXT: {{ $}} 1848 ; GFX908-NEXT: bb.3: 1849 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1850 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1851 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1852 %soffset = add i32 %soffset.base, 5000 1853 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1854 ret float %val 1855} 1856 1857; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 1858define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(<4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 1859 ; GFX908-LABEL: name: raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000 1860 ; GFX908: bb.0 (%ir-block.0): 1861 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1862 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1863 ; GFX908-NEXT: {{ $}} 1864 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1865 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1866 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1867 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1868 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1869 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1870 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1871 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1872 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1873 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1874 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1875 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1876 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 1877 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1878 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1879 ; GFX908-NEXT: {{ $}} 1880 ; GFX908-NEXT: bb.1: 1881 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1882 ; GFX908-NEXT: {{ $}} 1883 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1884 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1885 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1886 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1887 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1888 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1889 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1890 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1891 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1892 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1893 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1894 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1895 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1896 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1897 ; GFX908-NEXT: {{ $}} 1898 ; GFX908-NEXT: bb.2: 1899 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1900 ; GFX908-NEXT: {{ $}} 1901 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 904, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 1902 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1903 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1904 ; GFX908-NEXT: {{ $}} 1905 ; GFX908-NEXT: bb.3: 1906 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1907 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 1908 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 1909 %voffset = add i32 %voffset.base, 5000 1910 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 1911 ret float %val 1912} 1913 1914declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) 1915declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg) 1916declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg) 1917declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg) 1918 1919declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32 immarg) 1920declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg) 1921declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg) 1922declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg) 1923 1924declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32 immarg) 1925declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32 immarg) 1926