1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s 3 4;; Older intrinsics that take <4 x i32> 5 6define float @llvm_amdgcn_raw_buffer_load_f32(i32 %voffset, i32 %soffset) { 7 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_f32 8 ; GFX908: bb.0 (%ir-block.0): 9 ; GFX908-NEXT: successors: %bb.1(0x80000000) 10 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 11 ; GFX908-NEXT: {{ $}} 12 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 13 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 14 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 15 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 16 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 17 ; GFX908-NEXT: {{ $}} 18 ; GFX908-NEXT: bb.1: 19 ; GFX908-NEXT: successors: %bb.2(0x80000000) 20 ; GFX908-NEXT: {{ $}} 21 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 22 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 23 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 24 ; GFX908-NEXT: {{ $}} 25 ; GFX908-NEXT: bb.2: 26 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 27 ; GFX908-NEXT: {{ $}} 28 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 29 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 30 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 31 ; GFX908-NEXT: {{ $}} 32 ; GFX908-NEXT: bb.3: 33 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 34 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 35 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 36 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 37 ret float %val 38} 39 40define float @llvm_amdgcn_raw_tbuffer_load_f32(i32 %voffset, i32 %soffset) { 41 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_f32 42 ; GFX908: bb.0 (%ir-block.0): 43 ; GFX908-NEXT: successors: %bb.1(0x80000000) 44 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 45 ; GFX908-NEXT: {{ $}} 46 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 47 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 48 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 49 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 50 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 51 ; GFX908-NEXT: {{ $}} 52 ; GFX908-NEXT: bb.1: 53 ; GFX908-NEXT: successors: %bb.2(0x80000000) 54 ; GFX908-NEXT: {{ $}} 55 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 56 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 57 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 58 ; GFX908-NEXT: {{ $}} 59 ; GFX908-NEXT: bb.2: 60 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 61 ; GFX908-NEXT: {{ $}} 62 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 63 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 64 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 65 ; GFX908-NEXT: {{ $}} 66 ; GFX908-NEXT: bb.3: 67 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 68 ; GFX908-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 69 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 70 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 71 ret float %val 72} 73 74define <2 x float> @llvm_amdgcn_raw_buffer_load_v2f32(i32 %voffset, i32 %soffset) { 75 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v2f32 76 ; GFX908: bb.0 (%ir-block.0): 77 ; GFX908-NEXT: successors: %bb.1(0x80000000) 78 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 79 ; GFX908-NEXT: {{ $}} 80 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 81 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 82 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 83 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 84 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 85 ; GFX908-NEXT: {{ $}} 86 ; GFX908-NEXT: bb.1: 87 ; GFX908-NEXT: successors: %bb.2(0x80000000) 88 ; GFX908-NEXT: {{ $}} 89 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 90 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 91 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 92 ; GFX908-NEXT: {{ $}} 93 ; GFX908-NEXT: bb.2: 94 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 95 ; GFX908-NEXT: {{ $}} 96 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8) 97 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 98 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 99 ; GFX908-NEXT: {{ $}} 100 ; GFX908-NEXT: bb.3: 101 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 102 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 103 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 104 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 105 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 106 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 107 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 108 ret <2 x float> %val 109} 110 111define <2 x float> @llvm_amdgcn_raw_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) { 112 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v2f32 113 ; GFX908: bb.0 (%ir-block.0): 114 ; GFX908-NEXT: successors: %bb.1(0x80000000) 115 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 116 ; GFX908-NEXT: {{ $}} 117 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 118 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 119 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 120 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 121 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 122 ; GFX908-NEXT: {{ $}} 123 ; GFX908-NEXT: bb.1: 124 ; GFX908-NEXT: successors: %bb.2(0x80000000) 125 ; GFX908-NEXT: {{ $}} 126 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 127 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 128 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 129 ; GFX908-NEXT: {{ $}} 130 ; GFX908-NEXT: bb.2: 131 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 132 ; GFX908-NEXT: {{ $}} 133 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8) 134 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 135 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 136 ; GFX908-NEXT: {{ $}} 137 ; GFX908-NEXT: bb.3: 138 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 139 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 140 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 141 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 142 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 143 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 144 %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 145 ret <2 x float> %val 146} 147 148define <3 x float> @llvm_amdgcn_raw_buffer_load_v3f32(i32 %voffset, i32 %soffset) { 149 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v3f32 150 ; GFX908: bb.0 (%ir-block.0): 151 ; GFX908-NEXT: successors: %bb.1(0x80000000) 152 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 153 ; GFX908-NEXT: {{ $}} 154 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 155 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 156 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 157 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 158 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 159 ; GFX908-NEXT: {{ $}} 160 ; GFX908-NEXT: bb.1: 161 ; GFX908-NEXT: successors: %bb.2(0x80000000) 162 ; GFX908-NEXT: {{ $}} 163 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 164 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 165 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 166 ; GFX908-NEXT: {{ $}} 167 ; GFX908-NEXT: bb.2: 168 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 169 ; GFX908-NEXT: {{ $}} 170 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8) 171 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 172 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 173 ; GFX908-NEXT: {{ $}} 174 ; GFX908-NEXT: bb.3: 175 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 176 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 177 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 178 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 179 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 180 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 181 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 182 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 183 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 184 ret <3 x float> %val 185} 186 187define <3 x float> @llvm_amdgcn_raw_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) { 188 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v3f32 189 ; GFX908: bb.0 (%ir-block.0): 190 ; GFX908-NEXT: successors: %bb.1(0x80000000) 191 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 192 ; GFX908-NEXT: {{ $}} 193 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 194 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 195 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 196 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 197 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 198 ; GFX908-NEXT: {{ $}} 199 ; GFX908-NEXT: bb.1: 200 ; GFX908-NEXT: successors: %bb.2(0x80000000) 201 ; GFX908-NEXT: {{ $}} 202 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 203 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 204 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 205 ; GFX908-NEXT: {{ $}} 206 ; GFX908-NEXT: bb.2: 207 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 208 ; GFX908-NEXT: {{ $}} 209 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8) 210 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 211 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 212 ; GFX908-NEXT: {{ $}} 213 ; GFX908-NEXT: bb.3: 214 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 215 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 216 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 217 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 218 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 219 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 220 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 221 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 222 %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 223 ret <3 x float> %val 224} 225 226define <4 x float> @llvm_amdgcn_raw_buffer_load_v4f32(i32 %voffset, i32 %soffset) { 227 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v4f32 228 ; GFX908: bb.0 (%ir-block.0): 229 ; GFX908-NEXT: successors: %bb.1(0x80000000) 230 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 231 ; GFX908-NEXT: {{ $}} 232 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 233 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 234 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 235 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 236 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 237 ; GFX908-NEXT: {{ $}} 238 ; GFX908-NEXT: bb.1: 239 ; GFX908-NEXT: successors: %bb.2(0x80000000) 240 ; GFX908-NEXT: {{ $}} 241 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 242 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 243 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 244 ; GFX908-NEXT: {{ $}} 245 ; GFX908-NEXT: bb.2: 246 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 247 ; GFX908-NEXT: {{ $}} 248 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) 249 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 250 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 251 ; GFX908-NEXT: {{ $}} 252 ; GFX908-NEXT: bb.3: 253 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 254 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 255 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 256 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 257 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 258 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 259 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 260 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 261 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]] 262 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 263 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 264 ret <4 x float> %val 265} 266 267define <4 x float> @llvm_amdgcn_raw_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) { 268 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v4f32 269 ; GFX908: bb.0 (%ir-block.0): 270 ; GFX908-NEXT: successors: %bb.1(0x80000000) 271 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 272 ; GFX908-NEXT: {{ $}} 273 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 274 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 275 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 276 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 277 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 278 ; GFX908-NEXT: {{ $}} 279 ; GFX908-NEXT: bb.1: 280 ; GFX908-NEXT: successors: %bb.2(0x80000000) 281 ; GFX908-NEXT: {{ $}} 282 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 283 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 284 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 285 ; GFX908-NEXT: {{ $}} 286 ; GFX908-NEXT: bb.2: 287 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 288 ; GFX908-NEXT: {{ $}} 289 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) 290 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 291 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 292 ; GFX908-NEXT: {{ $}} 293 ; GFX908-NEXT: bb.3: 294 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 295 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 296 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 297 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 298 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 299 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 300 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 301 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 302 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]] 303 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 304 %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 305 ret <4 x float> %val 306} 307 308define void @llvm_amdgcn_raw_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) { 309 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_f32 310 ; GFX908: bb.0 (%ir-block.0): 311 ; GFX908-NEXT: successors: %bb.1(0x80000000) 312 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 313 ; GFX908-NEXT: {{ $}} 314 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 315 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 316 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 317 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 318 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 319 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 320 ; GFX908-NEXT: {{ $}} 321 ; GFX908-NEXT: bb.1: 322 ; GFX908-NEXT: successors: %bb.2(0x80000000) 323 ; GFX908-NEXT: {{ $}} 324 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 325 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 326 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 327 ; GFX908-NEXT: {{ $}} 328 ; GFX908-NEXT: bb.2: 329 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 330 ; GFX908-NEXT: {{ $}} 331 ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 332 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 333 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 334 ; GFX908-NEXT: {{ $}} 335 ; GFX908-NEXT: bb.3: 336 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 337 ; GFX908-NEXT: SI_RETURN 338 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 339 ret void 340} 341 342define void @llvm_amdgcn_raw_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) { 343 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_f32 344 ; GFX908: bb.0 (%ir-block.0): 345 ; GFX908-NEXT: successors: %bb.1(0x80000000) 346 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 347 ; GFX908-NEXT: {{ $}} 348 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 349 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 350 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 351 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 352 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 353 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 354 ; GFX908-NEXT: {{ $}} 355 ; GFX908-NEXT: bb.1: 356 ; GFX908-NEXT: successors: %bb.2(0x80000000) 357 ; GFX908-NEXT: {{ $}} 358 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 359 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 360 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 361 ; GFX908-NEXT: {{ $}} 362 ; GFX908-NEXT: bb.2: 363 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 364 ; GFX908-NEXT: {{ $}} 365 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 366 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 367 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 368 ; GFX908-NEXT: {{ $}} 369 ; GFX908-NEXT: bb.3: 370 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 371 ; GFX908-NEXT: SI_RETURN 372 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 373 ret void 374} 375 376define void @llvm_amdgcn_raw_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) { 377 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v2f32 378 ; GFX908: bb.0 (%ir-block.0): 379 ; GFX908-NEXT: successors: %bb.1(0x80000000) 380 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 381 ; GFX908-NEXT: {{ $}} 382 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 383 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 384 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 385 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 386 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 387 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 388 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 389 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 390 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 391 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 392 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 393 ; GFX908-NEXT: {{ $}} 394 ; GFX908-NEXT: bb.1: 395 ; GFX908-NEXT: successors: %bb.2(0x80000000) 396 ; GFX908-NEXT: {{ $}} 397 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 398 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 399 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 400 ; GFX908-NEXT: {{ $}} 401 ; GFX908-NEXT: bb.2: 402 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 403 ; GFX908-NEXT: {{ $}} 404 ; GFX908-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8) 405 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 406 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 407 ; GFX908-NEXT: {{ $}} 408 ; GFX908-NEXT: bb.3: 409 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 410 ; GFX908-NEXT: SI_RETURN 411 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 412 ret void 413} 414 415define void @llvm_amdgcn_raw_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) { 416 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v2f32 417 ; GFX908: bb.0 (%ir-block.0): 418 ; GFX908-NEXT: successors: %bb.1(0x80000000) 419 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 420 ; GFX908-NEXT: {{ $}} 421 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 422 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 423 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 424 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 425 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 426 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 427 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 428 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 429 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 430 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 431 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 432 ; GFX908-NEXT: {{ $}} 433 ; GFX908-NEXT: bb.1: 434 ; GFX908-NEXT: successors: %bb.2(0x80000000) 435 ; GFX908-NEXT: {{ $}} 436 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 437 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 438 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 439 ; GFX908-NEXT: {{ $}} 440 ; GFX908-NEXT: bb.2: 441 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 442 ; GFX908-NEXT: {{ $}} 443 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8) 444 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 445 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 446 ; GFX908-NEXT: {{ $}} 447 ; GFX908-NEXT: bb.3: 448 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 449 ; GFX908-NEXT: SI_RETURN 450 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 451 ret void 452} 453 454define void @llvm_amdgcn_raw_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) { 455 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v3f32 456 ; GFX908: bb.0 (%ir-block.0): 457 ; GFX908-NEXT: successors: %bb.1(0x80000000) 458 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 459 ; GFX908-NEXT: {{ $}} 460 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 461 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 462 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 463 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 464 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 465 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 466 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 467 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 468 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2 469 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] 470 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 471 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 472 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 473 ; GFX908-NEXT: {{ $}} 474 ; GFX908-NEXT: bb.1: 475 ; GFX908-NEXT: successors: %bb.2(0x80000000) 476 ; GFX908-NEXT: {{ $}} 477 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 478 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 479 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 480 ; GFX908-NEXT: {{ $}} 481 ; GFX908-NEXT: bb.2: 482 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 483 ; GFX908-NEXT: {{ $}} 484 ; GFX908-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8) 485 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 486 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 487 ; GFX908-NEXT: {{ $}} 488 ; GFX908-NEXT: bb.3: 489 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 490 ; GFX908-NEXT: SI_RETURN 491 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 492 ret void 493} 494 495define void @llvm_amdgcn_raw_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) { 496 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v3f32 497 ; GFX908: bb.0 (%ir-block.0): 498 ; GFX908-NEXT: successors: %bb.1(0x80000000) 499 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 500 ; GFX908-NEXT: {{ $}} 501 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 502 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 503 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 504 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 505 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 506 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 507 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 508 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 509 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2 510 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] 511 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 512 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 513 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 514 ; GFX908-NEXT: {{ $}} 515 ; GFX908-NEXT: bb.1: 516 ; GFX908-NEXT: successors: %bb.2(0x80000000) 517 ; GFX908-NEXT: {{ $}} 518 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 519 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 520 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 521 ; GFX908-NEXT: {{ $}} 522 ; GFX908-NEXT: bb.2: 523 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 524 ; GFX908-NEXT: {{ $}} 525 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8) 526 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 527 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 528 ; GFX908-NEXT: {{ $}} 529 ; GFX908-NEXT: bb.3: 530 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 531 ; GFX908-NEXT: SI_RETURN 532 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 533 ret void 534} 535 536define void @llvm_amdgcn_raw_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) { 537 ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v4f32 538 ; GFX908: bb.0 (%ir-block.0): 539 ; GFX908-NEXT: successors: %bb.1(0x80000000) 540 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 541 ; GFX908-NEXT: {{ $}} 542 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 543 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 544 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 545 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 546 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 547 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 548 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 549 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 550 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 551 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 552 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 553 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 554 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 555 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 556 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 557 ; GFX908-NEXT: {{ $}} 558 ; GFX908-NEXT: bb.1: 559 ; GFX908-NEXT: successors: %bb.2(0x80000000) 560 ; GFX908-NEXT: {{ $}} 561 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 562 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 563 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 564 ; GFX908-NEXT: {{ $}} 565 ; GFX908-NEXT: bb.2: 566 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 567 ; GFX908-NEXT: {{ $}} 568 ; GFX908-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) 569 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 570 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 571 ; GFX908-NEXT: {{ $}} 572 ; GFX908-NEXT: bb.3: 573 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 574 ; GFX908-NEXT: SI_RETURN 575 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0) 576 ret void 577} 578 579define void @llvm_amdgcn_raw_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) { 580 ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v4f32 581 ; GFX908: bb.0 (%ir-block.0): 582 ; GFX908-NEXT: successors: %bb.1(0x80000000) 583 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 584 ; GFX908-NEXT: {{ $}} 585 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 586 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 587 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 588 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 589 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 590 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 591 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 592 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 593 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 594 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 595 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 596 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 597 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 598 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 599 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 600 ; GFX908-NEXT: {{ $}} 601 ; GFX908-NEXT: bb.1: 602 ; GFX908-NEXT: successors: %bb.2(0x80000000) 603 ; GFX908-NEXT: {{ $}} 604 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 605 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 606 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 607 ; GFX908-NEXT: {{ $}} 608 ; GFX908-NEXT: bb.2: 609 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 610 ; GFX908-NEXT: {{ $}} 611 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) 612 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 613 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 614 ; GFX908-NEXT: {{ $}} 615 ; GFX908-NEXT: bb.3: 616 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 617 ; GFX908-NEXT: SI_RETURN 618 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 619 ret void 620} 621 622;; Newer intrinsics that taken addrspace(8) pointers 623 624define float @llvm_amdgcn_raw_ptr_buffer_load_f32(i32 %voffset, i32 %soffset) { 625 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_f32 626 ; GFX908: bb.0 (%ir-block.0): 627 ; GFX908-NEXT: successors: %bb.1(0x80000000) 628 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 629 ; GFX908-NEXT: {{ $}} 630 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 631 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 632 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 633 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 634 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 635 ; GFX908-NEXT: {{ $}} 636 ; GFX908-NEXT: bb.1: 637 ; GFX908-NEXT: successors: %bb.2(0x80000000) 638 ; GFX908-NEXT: {{ $}} 639 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 640 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 641 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 642 ; GFX908-NEXT: {{ $}} 643 ; GFX908-NEXT: bb.2: 644 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 645 ; GFX908-NEXT: {{ $}} 646 ; GFX908-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8) 647 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 648 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 649 ; GFX908-NEXT: {{ $}} 650 ; GFX908-NEXT: bb.3: 651 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 652 ; GFX908-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 653 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 654 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 655 ret float %val 656} 657 658define float @llvm_amdgcn_raw_ptr_tbuffer_load_f32(i32 %voffset, i32 %soffset) { 659 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_f32 660 ; GFX908: bb.0 (%ir-block.0): 661 ; GFX908-NEXT: successors: %bb.1(0x80000000) 662 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 663 ; GFX908-NEXT: {{ $}} 664 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 665 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 666 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 667 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 668 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 669 ; GFX908-NEXT: {{ $}} 670 ; GFX908-NEXT: bb.1: 671 ; GFX908-NEXT: successors: %bb.2(0x80000000) 672 ; GFX908-NEXT: {{ $}} 673 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 674 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 675 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 676 ; GFX908-NEXT: {{ $}} 677 ; GFX908-NEXT: bb.2: 678 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 679 ; GFX908-NEXT: {{ $}} 680 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8) 681 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 682 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 683 ; GFX908-NEXT: {{ $}} 684 ; GFX908-NEXT: bb.3: 685 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 686 ; GFX908-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 687 ; GFX908-NEXT: SI_RETURN implicit $vgpr0 688 %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 689 ret float %val 690} 691 692define <2 x float> @llvm_amdgcn_raw_ptr_buffer_load_v2f32(i32 %voffset, i32 %soffset) { 693 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v2f32 694 ; GFX908: bb.0 (%ir-block.0): 695 ; GFX908-NEXT: successors: %bb.1(0x80000000) 696 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 697 ; GFX908-NEXT: {{ $}} 698 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 699 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 700 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 701 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 702 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 703 ; GFX908-NEXT: {{ $}} 704 ; GFX908-NEXT: bb.1: 705 ; GFX908-NEXT: successors: %bb.2(0x80000000) 706 ; GFX908-NEXT: {{ $}} 707 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 708 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 709 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 710 ; GFX908-NEXT: {{ $}} 711 ; GFX908-NEXT: bb.2: 712 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 713 ; GFX908-NEXT: {{ $}} 714 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8) 715 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 716 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 717 ; GFX908-NEXT: {{ $}} 718 ; GFX908-NEXT: bb.3: 719 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 720 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 721 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 722 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 723 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 724 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 725 %val = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 726 ret <2 x float> %val 727} 728 729define <2 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) { 730 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v2f32 731 ; GFX908: bb.0 (%ir-block.0): 732 ; GFX908-NEXT: successors: %bb.1(0x80000000) 733 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 734 ; GFX908-NEXT: {{ $}} 735 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 736 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 737 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 738 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 739 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 740 ; GFX908-NEXT: {{ $}} 741 ; GFX908-NEXT: bb.1: 742 ; GFX908-NEXT: successors: %bb.2(0x80000000) 743 ; GFX908-NEXT: {{ $}} 744 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 745 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 746 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 747 ; GFX908-NEXT: {{ $}} 748 ; GFX908-NEXT: bb.2: 749 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 750 ; GFX908-NEXT: {{ $}} 751 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8) 752 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 753 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 754 ; GFX908-NEXT: {{ $}} 755 ; GFX908-NEXT: bb.3: 756 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 757 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 758 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 759 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 760 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 761 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 762 %val = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 763 ret <2 x float> %val 764} 765 766define <3 x float> @llvm_amdgcn_raw_ptr_buffer_load_v3f32(i32 %voffset, i32 %soffset) { 767 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v3f32 768 ; GFX908: bb.0 (%ir-block.0): 769 ; GFX908-NEXT: successors: %bb.1(0x80000000) 770 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 771 ; GFX908-NEXT: {{ $}} 772 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 773 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 774 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 775 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 776 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 777 ; GFX908-NEXT: {{ $}} 778 ; GFX908-NEXT: bb.1: 779 ; GFX908-NEXT: successors: %bb.2(0x80000000) 780 ; GFX908-NEXT: {{ $}} 781 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 782 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 783 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 784 ; GFX908-NEXT: {{ $}} 785 ; GFX908-NEXT: bb.2: 786 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 787 ; GFX908-NEXT: {{ $}} 788 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8) 789 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 790 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 791 ; GFX908-NEXT: {{ $}} 792 ; GFX908-NEXT: bb.3: 793 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 794 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 795 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 796 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 797 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 798 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 799 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 800 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 801 %val = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 802 ret <3 x float> %val 803} 804 805define <3 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) { 806 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v3f32 807 ; GFX908: bb.0 (%ir-block.0): 808 ; GFX908-NEXT: successors: %bb.1(0x80000000) 809 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 810 ; GFX908-NEXT: {{ $}} 811 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 812 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 813 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 814 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 815 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 816 ; GFX908-NEXT: {{ $}} 817 ; GFX908-NEXT: bb.1: 818 ; GFX908-NEXT: successors: %bb.2(0x80000000) 819 ; GFX908-NEXT: {{ $}} 820 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 821 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 822 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 823 ; GFX908-NEXT: {{ $}} 824 ; GFX908-NEXT: bb.2: 825 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 826 ; GFX908-NEXT: {{ $}} 827 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8) 828 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 829 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 830 ; GFX908-NEXT: {{ $}} 831 ; GFX908-NEXT: bb.3: 832 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 833 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 834 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 835 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 836 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 837 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 838 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 839 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 840 %val = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 841 ret <3 x float> %val 842} 843 844define <4 x float> @llvm_amdgcn_raw_ptr_buffer_load_v4f32(i32 %voffset, i32 %soffset) { 845 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v4f32 846 ; GFX908: bb.0 (%ir-block.0): 847 ; GFX908-NEXT: successors: %bb.1(0x80000000) 848 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 849 ; GFX908-NEXT: {{ $}} 850 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 851 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 852 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 853 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 854 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 855 ; GFX908-NEXT: {{ $}} 856 ; GFX908-NEXT: bb.1: 857 ; GFX908-NEXT: successors: %bb.2(0x80000000) 858 ; GFX908-NEXT: {{ $}} 859 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 860 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 861 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 862 ; GFX908-NEXT: {{ $}} 863 ; GFX908-NEXT: bb.2: 864 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 865 ; GFX908-NEXT: {{ $}} 866 ; GFX908-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8) 867 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 868 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 869 ; GFX908-NEXT: {{ $}} 870 ; GFX908-NEXT: bb.3: 871 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 872 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 873 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 874 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 875 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 876 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 877 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 878 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 879 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]] 880 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 881 %val = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 882 ret <4 x float> %val 883} 884 885define <4 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) { 886 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v4f32 887 ; GFX908: bb.0 (%ir-block.0): 888 ; GFX908-NEXT: successors: %bb.1(0x80000000) 889 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1 890 ; GFX908-NEXT: {{ $}} 891 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 892 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 893 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 894 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 895 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 896 ; GFX908-NEXT: {{ $}} 897 ; GFX908-NEXT: bb.1: 898 ; GFX908-NEXT: successors: %bb.2(0x80000000) 899 ; GFX908-NEXT: {{ $}} 900 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 901 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 902 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 903 ; GFX908-NEXT: {{ $}} 904 ; GFX908-NEXT: bb.2: 905 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 906 ; GFX908-NEXT: {{ $}} 907 ; GFX908-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8) 908 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 909 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 910 ; GFX908-NEXT: {{ $}} 911 ; GFX908-NEXT: bb.3: 912 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 913 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 914 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 915 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 916 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 917 ; GFX908-NEXT: $vgpr0 = COPY [[COPY2]] 918 ; GFX908-NEXT: $vgpr1 = COPY [[COPY3]] 919 ; GFX908-NEXT: $vgpr2 = COPY [[COPY4]] 920 ; GFX908-NEXT: $vgpr3 = COPY [[COPY5]] 921 ; GFX908-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 922 %val = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 923 ret <4 x float> %val 924} 925 926define void @llvm_amdgcn_raw_ptr_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) { 927 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_f32 928 ; GFX908: bb.0 (%ir-block.0): 929 ; GFX908-NEXT: successors: %bb.1(0x80000000) 930 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 931 ; GFX908-NEXT: {{ $}} 932 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 933 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 934 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 935 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 936 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 937 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 938 ; GFX908-NEXT: {{ $}} 939 ; GFX908-NEXT: bb.1: 940 ; GFX908-NEXT: successors: %bb.2(0x80000000) 941 ; GFX908-NEXT: {{ $}} 942 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 943 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 944 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 945 ; GFX908-NEXT: {{ $}} 946 ; GFX908-NEXT: bb.2: 947 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 948 ; GFX908-NEXT: {{ $}} 949 ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8) 950 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 951 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 952 ; GFX908-NEXT: {{ $}} 953 ; GFX908-NEXT: bb.3: 954 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 955 ; GFX908-NEXT: SI_RETURN 956 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 957 ret void 958} 959 960define void @llvm_amdgcn_raw_ptr_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) { 961 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_f32 962 ; GFX908: bb.0 (%ir-block.0): 963 ; GFX908-NEXT: successors: %bb.1(0x80000000) 964 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 965 ; GFX908-NEXT: {{ $}} 966 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 967 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 968 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 969 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 970 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 971 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 972 ; GFX908-NEXT: {{ $}} 973 ; GFX908-NEXT: bb.1: 974 ; GFX908-NEXT: successors: %bb.2(0x80000000) 975 ; GFX908-NEXT: {{ $}} 976 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 977 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 978 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 979 ; GFX908-NEXT: {{ $}} 980 ; GFX908-NEXT: bb.2: 981 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 982 ; GFX908-NEXT: {{ $}} 983 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8) 984 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 985 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 986 ; GFX908-NEXT: {{ $}} 987 ; GFX908-NEXT: bb.3: 988 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 989 ; GFX908-NEXT: SI_RETURN 990 call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 991 ret void 992} 993 994define void @llvm_amdgcn_raw_ptr_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) { 995 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v2f32 996 ; GFX908: bb.0 (%ir-block.0): 997 ; GFX908-NEXT: successors: %bb.1(0x80000000) 998 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 999 ; GFX908-NEXT: {{ $}} 1000 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1001 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1002 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1003 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1004 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1005 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1006 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1007 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 1008 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1009 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1010 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1011 ; GFX908-NEXT: {{ $}} 1012 ; GFX908-NEXT: bb.1: 1013 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1014 ; GFX908-NEXT: {{ $}} 1015 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1016 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1017 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1018 ; GFX908-NEXT: {{ $}} 1019 ; GFX908-NEXT: bb.2: 1020 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1021 ; GFX908-NEXT: {{ $}} 1022 ; GFX908-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1023 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1024 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1025 ; GFX908-NEXT: {{ $}} 1026 ; GFX908-NEXT: bb.3: 1027 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1028 ; GFX908-NEXT: SI_RETURN 1029 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 1030 ret void 1031} 1032 1033define void @llvm_amdgcn_raw_ptr_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) { 1034 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v2f32 1035 ; GFX908: bb.0 (%ir-block.0): 1036 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1037 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 1038 ; GFX908-NEXT: {{ $}} 1039 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1040 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1041 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1042 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1043 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1044 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1045 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 1046 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 1047 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1048 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1049 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1050 ; GFX908-NEXT: {{ $}} 1051 ; GFX908-NEXT: bb.1: 1052 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1053 ; GFX908-NEXT: {{ $}} 1054 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1055 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1056 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1057 ; GFX908-NEXT: {{ $}} 1058 ; GFX908-NEXT: bb.2: 1059 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1060 ; GFX908-NEXT: {{ $}} 1061 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1062 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1063 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1064 ; GFX908-NEXT: {{ $}} 1065 ; GFX908-NEXT: bb.3: 1066 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1067 ; GFX908-NEXT: SI_RETURN 1068 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 1069 ret void 1070} 1071 1072define void @llvm_amdgcn_raw_ptr_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) { 1073 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v3f32 1074 ; GFX908: bb.0 (%ir-block.0): 1075 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1076 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1077 ; GFX908-NEXT: {{ $}} 1078 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1079 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1080 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1081 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1082 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1083 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1084 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1085 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1086 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2 1087 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] 1088 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1089 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1090 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1091 ; GFX908-NEXT: {{ $}} 1092 ; GFX908-NEXT: bb.1: 1093 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1094 ; GFX908-NEXT: {{ $}} 1095 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1096 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1097 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1098 ; GFX908-NEXT: {{ $}} 1099 ; GFX908-NEXT: bb.2: 1100 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1101 ; GFX908-NEXT: {{ $}} 1102 ; GFX908-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1103 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1104 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1105 ; GFX908-NEXT: {{ $}} 1106 ; GFX908-NEXT: bb.3: 1107 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1108 ; GFX908-NEXT: SI_RETURN 1109 call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 1110 ret void 1111} 1112 1113define void @llvm_amdgcn_raw_ptr_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) { 1114 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v3f32 1115 ; GFX908: bb.0 (%ir-block.0): 1116 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1117 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1118 ; GFX908-NEXT: {{ $}} 1119 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1120 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1121 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1122 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1123 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1124 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1125 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1126 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1127 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2 1128 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] 1129 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1130 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1131 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1132 ; GFX908-NEXT: {{ $}} 1133 ; GFX908-NEXT: bb.1: 1134 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1135 ; GFX908-NEXT: {{ $}} 1136 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1137 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1138 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1139 ; GFX908-NEXT: {{ $}} 1140 ; GFX908-NEXT: bb.2: 1141 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1142 ; GFX908-NEXT: {{ $}} 1143 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1144 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1145 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1146 ; GFX908-NEXT: {{ $}} 1147 ; GFX908-NEXT: bb.3: 1148 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1149 ; GFX908-NEXT: SI_RETURN 1150 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 1151 ret void 1152} 1153 1154define void @llvm_amdgcn_raw_ptr_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) { 1155 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v4f32 1156 ; GFX908: bb.0 (%ir-block.0): 1157 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1158 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1159 ; GFX908-NEXT: {{ $}} 1160 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1161 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1162 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1163 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1164 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1165 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1166 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1167 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1168 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1169 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1170 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1171 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 1172 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1173 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1174 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1175 ; GFX908-NEXT: {{ $}} 1176 ; GFX908-NEXT: bb.1: 1177 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1178 ; GFX908-NEXT: {{ $}} 1179 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1180 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1181 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1182 ; GFX908-NEXT: {{ $}} 1183 ; GFX908-NEXT: bb.2: 1184 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1185 ; GFX908-NEXT: {{ $}} 1186 ; GFX908-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1187 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1188 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1189 ; GFX908-NEXT: {{ $}} 1190 ; GFX908-NEXT: bb.3: 1191 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1192 ; GFX908-NEXT: SI_RETURN 1193 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0) 1194 ret void 1195} 1196 1197define void @llvm_amdgcn_raw_ptr_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) { 1198 ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v4f32 1199 ; GFX908: bb.0 (%ir-block.0): 1200 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1201 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1202 ; GFX908-NEXT: {{ $}} 1203 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1204 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1205 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1206 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1207 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1208 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1209 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1210 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1211 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1212 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1213 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1214 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 1215 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 1216 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1217 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1218 ; GFX908-NEXT: {{ $}} 1219 ; GFX908-NEXT: bb.1: 1220 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1221 ; GFX908-NEXT: {{ $}} 1222 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1223 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec 1224 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1225 ; GFX908-NEXT: {{ $}} 1226 ; GFX908-NEXT: bb.2: 1227 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1228 ; GFX908-NEXT: {{ $}} 1229 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8) 1230 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1231 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1232 ; GFX908-NEXT: {{ $}} 1233 ; GFX908-NEXT: bb.3: 1234 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1235 ; GFX908-NEXT: SI_RETURN 1236 call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0) 1237 ret void 1238} 1239 1240declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 ) 1241declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) 1242declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) 1243declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) 1244declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) 1245declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) 1246declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) 1247declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) 1248declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) 1249declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) 1250declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) 1251declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) 1252declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32) 1253declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32) 1254declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) 1255declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) 1256 1257declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32 ) 1258declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32, i32) 1259declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32) 1260declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32, i32) 1261declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32) 1262declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32, i32) 1263declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32) 1264declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32, i32) 1265declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32) 1266declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32, i32) 1267declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32) 1268declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32) 1269declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32) 1270declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32) 1271declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32) 1272declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32) 1273