1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s 3 4; Natural mapping 5define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 6 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7 ; GFX908: bb.0 (%ir-block.0): 8 ; GFX908-NEXT: successors: %bb.1(0x80000000) 9 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 10 ; GFX908-NEXT: {{ $}} 11 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 12 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 13 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 16 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 18 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 19 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 20 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 21 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 22 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 23 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 24 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 25 ; GFX908-NEXT: {{ $}} 26 ; GFX908-NEXT: bb.1: 27 ; GFX908-NEXT: successors: %bb.2(0x80000000) 28 ; GFX908-NEXT: {{ $}} 29 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 30 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 31 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 32 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 33 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 34 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 35 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 36 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 37 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 38 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 39 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 40 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 41 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 42 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 43 ; GFX908-NEXT: {{ $}} 44 ; GFX908-NEXT: bb.2: 45 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 46 ; GFX908-NEXT: {{ $}} 47 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 48 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 49 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 50 ; GFX908-NEXT: {{ $}} 51 ; GFX908-NEXT: bb.3: 52 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 53 ; GFX908-NEXT: SI_RETURN 54 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 55 ret void 56} 57 58; Natural mapping 59define void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 60 ; GFX908-LABEL: name: raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 61 ; GFX908: bb.0 (%ir-block.0): 62 ; GFX908-NEXT: successors: %bb.1(0x80000000) 63 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 64 ; GFX908-NEXT: {{ $}} 65 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7 66 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6 67 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5 68 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4 69 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 70 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 71 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 72 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 73 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 74 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 75 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 76 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 77 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 78 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 79 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 80 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 81 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 82 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 83 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 84 ; GFX908-NEXT: {{ $}} 85 ; GFX908-NEXT: bb.1: 86 ; GFX908-NEXT: successors: %bb.2(0x80000000) 87 ; GFX908-NEXT: {{ $}} 88 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 89 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 90 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 91 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 92 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 93 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 94 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 95 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 96 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 97 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 98 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 99 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 100 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 101 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 102 ; GFX908-NEXT: {{ $}} 103 ; GFX908-NEXT: bb.2: 104 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 105 ; GFX908-NEXT: {{ $}} 106 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8) 107 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 108 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 109 ; GFX908-NEXT: {{ $}} 110 ; GFX908-NEXT: bb.3: 111 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 112 ; GFX908-NEXT: SI_RETURN 113 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 114 ret void 115} 116 117; Natural mapping 118define void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 119 ; GFX908-LABEL: name: raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 120 ; GFX908: bb.0 (%ir-block.0): 121 ; GFX908-NEXT: successors: %bb.1(0x80000000) 122 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 123 ; GFX908-NEXT: {{ $}} 124 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 125 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr7 126 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr6 127 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr5 128 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 129 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 130 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 131 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 132 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 133 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 134 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 135 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 136 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 137 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 138 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 139 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 140 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 141 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2 142 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]] 143 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 144 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 145 ; GFX908-NEXT: {{ $}} 146 ; GFX908-NEXT: bb.1: 147 ; GFX908-NEXT: successors: %bb.2(0x80000000) 148 ; GFX908-NEXT: {{ $}} 149 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 150 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 151 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 152 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 153 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 154 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 155 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 156 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 157 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 158 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 159 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 160 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 161 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 162 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 163 ; GFX908-NEXT: {{ $}} 164 ; GFX908-NEXT: bb.2: 165 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 166 ; GFX908-NEXT: {{ $}} 167 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY9]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8) 168 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 169 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 170 ; GFX908-NEXT: {{ $}} 171 ; GFX908-NEXT: bb.3: 172 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 173 ; GFX908-NEXT: SI_RETURN 174 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 175 ret void 176} 177 178; Natural mapping 179define void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 180 ; GFX908-LABEL: name: raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 181 ; GFX908: bb.0 (%ir-block.0): 182 ; GFX908-NEXT: successors: %bb.1(0x80000000) 183 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 184 ; GFX908-NEXT: {{ $}} 185 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr9 186 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 187 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr7 188 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr6 189 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr5 190 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 191 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 192 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr2 193 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr1 194 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr0 195 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 196 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 197 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 198 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 199 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 200 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 201 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 202 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 203 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 204 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 205 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]] 206 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 207 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 208 ; GFX908-NEXT: {{ $}} 209 ; GFX908-NEXT: bb.1: 210 ; GFX908-NEXT: successors: %bb.2(0x80000000) 211 ; GFX908-NEXT: {{ $}} 212 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 213 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 214 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 215 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 216 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 217 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 218 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 219 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 220 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 221 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 222 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 223 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 224 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 225 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 226 ; GFX908-NEXT: {{ $}} 227 ; GFX908-NEXT: bb.2: 228 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 229 ; GFX908-NEXT: {{ $}} 230 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY10]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) 231 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 232 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 233 ; GFX908-NEXT: {{ $}} 234 ; GFX908-NEXT: bb.3: 235 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 236 ; GFX908-NEXT: SI_RETURN 237 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 238 ret void 239} 240 241; Copies for VGPR arguments 242define void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 243 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset 244 ; GFX908: bb.0 (%ir-block.0): 245 ; GFX908-NEXT: successors: %bb.1(0x80000000) 246 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 247 ; GFX908-NEXT: {{ $}} 248 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 249 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 250 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 251 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 252 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 253 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 254 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 255 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 256 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 257 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 258 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 259 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 260 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 261 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 262 ; GFX908-NEXT: {{ $}} 263 ; GFX908-NEXT: bb.1: 264 ; GFX908-NEXT: successors: %bb.2(0x80000000) 265 ; GFX908-NEXT: {{ $}} 266 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 267 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 268 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 269 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 270 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 271 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 272 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 273 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 274 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 275 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 276 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 277 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 278 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 279 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 280 ; GFX908-NEXT: {{ $}} 281 ; GFX908-NEXT: bb.2: 282 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 283 ; GFX908-NEXT: {{ $}} 284 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 285 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 286 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 287 ; GFX908-NEXT: {{ $}} 288 ; GFX908-NEXT: bb.3: 289 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 290 ; GFX908-NEXT: SI_RETURN 291 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 292 ret void 293} 294 295; Waterfall for rsrc 296define void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 297 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset 298 ; GFX908: bb.0 (%ir-block.0): 299 ; GFX908-NEXT: successors: %bb.1(0x80000000) 300 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 301 ; GFX908-NEXT: {{ $}} 302 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 303 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 304 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 305 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 306 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 307 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 308 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 309 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 310 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 311 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 312 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 313 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 314 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 315 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 316 ; GFX908-NEXT: {{ $}} 317 ; GFX908-NEXT: bb.1: 318 ; GFX908-NEXT: successors: %bb.2(0x80000000) 319 ; GFX908-NEXT: {{ $}} 320 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 321 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 322 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 323 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 324 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 325 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 326 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 327 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 328 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 329 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 330 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 331 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 332 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 333 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 334 ; GFX908-NEXT: {{ $}} 335 ; GFX908-NEXT: bb.2: 336 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 337 ; GFX908-NEXT: {{ $}} 338 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 339 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 340 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 341 ; GFX908-NEXT: {{ $}} 342 ; GFX908-NEXT: bb.3: 343 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 344 ; GFX908-NEXT: SI_RETURN 345 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 1) 346 ret void 347} 348 349; Waterfall for rsrc and soffset 350define void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 351 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset 352 ; GFX908: bb.0 (%ir-block.0): 353 ; GFX908-NEXT: successors: %bb.1(0x80000000) 354 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 355 ; GFX908-NEXT: {{ $}} 356 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 357 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 358 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 359 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 360 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 361 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 362 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 363 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 364 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 365 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 366 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 367 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 368 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 369 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 370 ; GFX908-NEXT: {{ $}} 371 ; GFX908-NEXT: bb.1: 372 ; GFX908-NEXT: successors: %bb.2(0x80000000) 373 ; GFX908-NEXT: {{ $}} 374 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 375 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 376 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 377 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 378 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 379 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 380 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 381 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 382 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 383 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 384 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 385 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 386 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 387 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 388 ; GFX908-NEXT: {{ $}} 389 ; GFX908-NEXT: bb.2: 390 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 391 ; GFX908-NEXT: {{ $}} 392 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 393 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 394 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 395 ; GFX908-NEXT: {{ $}} 396 ; GFX908-NEXT: bb.3: 397 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 398 ; GFX908-NEXT: SI_RETURN 399 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 400 ret void 401} 402 403; Waterfall for rsrc and soffset, copy for voffset 404define void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 405 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset 406 ; GFX908: bb.0 (%ir-block.0): 407 ; GFX908-NEXT: successors: %bb.1(0x80000000) 408 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 409 ; GFX908-NEXT: {{ $}} 410 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 411 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 412 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 413 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 414 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 415 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 416 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 417 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 418 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 419 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 420 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 421 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 422 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 423 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 424 ; GFX908-NEXT: {{ $}} 425 ; GFX908-NEXT: bb.1: 426 ; GFX908-NEXT: successors: %bb.2(0x80000000) 427 ; GFX908-NEXT: {{ $}} 428 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 429 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 430 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 431 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 432 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 433 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 434 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 435 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 436 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 437 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 438 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 439 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 440 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 441 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 442 ; GFX908-NEXT: {{ $}} 443 ; GFX908-NEXT: bb.2: 444 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 445 ; GFX908-NEXT: {{ $}} 446 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 447 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 448 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 449 ; GFX908-NEXT: {{ $}} 450 ; GFX908-NEXT: bb.3: 451 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 452 ; GFX908-NEXT: SI_RETURN 453 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 454 ret void 455} 456 457; Natural mapping + glc 458define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 459 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 460 ; GFX908: bb.0 (%ir-block.0): 461 ; GFX908-NEXT: successors: %bb.1(0x80000000) 462 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 463 ; GFX908-NEXT: {{ $}} 464 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 465 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 466 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 467 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 468 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 469 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 470 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 471 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 472 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 473 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 474 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 475 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 476 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 477 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 478 ; GFX908-NEXT: {{ $}} 479 ; GFX908-NEXT: bb.1: 480 ; GFX908-NEXT: successors: %bb.2(0x80000000) 481 ; GFX908-NEXT: {{ $}} 482 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 483 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 484 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 485 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 486 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 487 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 488 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 489 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 490 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 491 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 492 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 493 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 494 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 495 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 496 ; GFX908-NEXT: {{ $}} 497 ; GFX908-NEXT: bb.2: 498 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 499 ; GFX908-NEXT: {{ $}} 500 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 501 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 502 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 503 ; GFX908-NEXT: {{ $}} 504 ; GFX908-NEXT: bb.3: 505 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 506 ; GFX908-NEXT: SI_RETURN 507 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) 508 ret void 509} 510 511; Natural mapping + slc 512define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 513 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 514 ; GFX908: bb.0 (%ir-block.0): 515 ; GFX908-NEXT: successors: %bb.1(0x80000000) 516 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 517 ; GFX908-NEXT: {{ $}} 518 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 519 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 520 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 521 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 522 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 523 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 524 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 525 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 526 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 527 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 528 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 529 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 530 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 531 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 532 ; GFX908-NEXT: {{ $}} 533 ; GFX908-NEXT: bb.1: 534 ; GFX908-NEXT: successors: %bb.2(0x80000000) 535 ; GFX908-NEXT: {{ $}} 536 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 537 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 538 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 539 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 540 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 541 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 542 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 543 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 544 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 545 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 546 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 547 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 548 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 549 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 550 ; GFX908-NEXT: {{ $}} 551 ; GFX908-NEXT: bb.2: 552 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 553 ; GFX908-NEXT: {{ $}} 554 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 555 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 556 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 557 ; GFX908-NEXT: {{ $}} 558 ; GFX908-NEXT: bb.3: 559 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 560 ; GFX908-NEXT: SI_RETURN 561 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) 562 ret void 563} 564 565; Natural mapping + glc + slc 566define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 567 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 568 ; GFX908: bb.0 (%ir-block.0): 569 ; GFX908-NEXT: successors: %bb.1(0x80000000) 570 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 571 ; GFX908-NEXT: {{ $}} 572 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 573 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 574 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 575 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 576 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 577 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 578 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 579 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 580 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 581 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 582 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 583 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 584 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 585 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 586 ; GFX908-NEXT: {{ $}} 587 ; GFX908-NEXT: bb.1: 588 ; GFX908-NEXT: successors: %bb.2(0x80000000) 589 ; GFX908-NEXT: {{ $}} 590 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 591 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 592 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 593 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 594 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 595 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 596 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 597 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 598 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 599 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 600 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 601 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 602 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 603 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 604 ; GFX908-NEXT: {{ $}} 605 ; GFX908-NEXT: bb.2: 606 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 607 ; GFX908-NEXT: {{ $}} 608 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 609 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 610 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 611 ; GFX908-NEXT: {{ $}} 612 ; GFX908-NEXT: bb.3: 613 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 614 ; GFX908-NEXT: SI_RETURN 615 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) 616 ret void 617} 618 619; Natural mapping + dlc 620define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) { 621 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 622 ; GFX908: bb.0 (%ir-block.0): 623 ; GFX908-NEXT: successors: %bb.1(0x80000000) 624 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 625 ; GFX908-NEXT: {{ $}} 626 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 627 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 628 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 629 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 630 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 631 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 632 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 633 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 634 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 635 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 636 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 637 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 638 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 639 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 640 ; GFX908-NEXT: {{ $}} 641 ; GFX908-NEXT: bb.1: 642 ; GFX908-NEXT: successors: %bb.2(0x80000000) 643 ; GFX908-NEXT: {{ $}} 644 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 645 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 646 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 647 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 648 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 649 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 650 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 651 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 652 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 653 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 654 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 655 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 656 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 657 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 658 ; GFX908-NEXT: {{ $}} 659 ; GFX908-NEXT: bb.2: 660 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 661 ; GFX908-NEXT: {{ $}} 662 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 663 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 664 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 665 ; GFX908-NEXT: {{ $}} 666 ; GFX908-NEXT: bb.3: 667 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 668 ; GFX908-NEXT: SI_RETURN 669 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) 670 ret void 671} 672 673define void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(float %val, <4 x i32> %rsrc, i32 %soffset) { 674 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0 675 ; GFX908: bb.0 (%ir-block.0): 676 ; GFX908-NEXT: successors: %bb.1(0x80000000) 677 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 678 ; GFX908-NEXT: {{ $}} 679 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 680 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 681 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 682 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 683 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 684 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 685 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 686 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 687 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 688 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 689 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 690 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 691 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 692 ; GFX908-NEXT: {{ $}} 693 ; GFX908-NEXT: bb.1: 694 ; GFX908-NEXT: successors: %bb.2(0x80000000) 695 ; GFX908-NEXT: {{ $}} 696 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 697 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 698 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 699 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 700 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 701 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 702 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 703 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 704 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 705 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 706 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 707 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 708 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 709 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 710 ; GFX908-NEXT: {{ $}} 711 ; GFX908-NEXT: bb.2: 712 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 713 ; GFX908-NEXT: {{ $}} 714 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY5]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 715 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 716 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 717 ; GFX908-NEXT: {{ $}} 718 ; GFX908-NEXT: bb.3: 719 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 720 ; GFX908-NEXT: SI_RETURN 721 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) 722 ret void 723} 724 725define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(float %val, <4 x i32> %rsrc, i32 %soffset) { 726 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095 727 ; GFX908: bb.0 (%ir-block.0): 728 ; GFX908-NEXT: successors: %bb.1(0x80000000) 729 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 730 ; GFX908-NEXT: {{ $}} 731 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 732 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 733 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 734 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 735 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 736 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 737 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 738 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 739 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 740 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 741 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 742 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 743 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 744 ; GFX908-NEXT: {{ $}} 745 ; GFX908-NEXT: bb.1: 746 ; GFX908-NEXT: successors: %bb.2(0x80000000) 747 ; GFX908-NEXT: {{ $}} 748 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 749 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 750 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 751 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 752 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 753 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 754 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 755 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 756 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 757 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 758 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 759 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 760 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 761 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 762 ; GFX908-NEXT: {{ $}} 763 ; GFX908-NEXT: bb.2: 764 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 765 ; GFX908-NEXT: {{ $}} 766 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY5]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 767 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 768 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 769 ; GFX908-NEXT: {{ $}} 770 ; GFX908-NEXT: bb.3: 771 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 772 ; GFX908-NEXT: SI_RETURN 773 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) 774 ret void 775} 776 777define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(float %val, <4 x i32> %rsrc, i32 %soffset) { 778 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096 779 ; GFX908: bb.0 (%ir-block.0): 780 ; GFX908-NEXT: successors: %bb.1(0x80000000) 781 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 782 ; GFX908-NEXT: {{ $}} 783 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 784 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 785 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 786 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 787 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 788 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 789 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 790 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 791 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 792 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 793 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 794 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec 795 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 796 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 797 ; GFX908-NEXT: {{ $}} 798 ; GFX908-NEXT: bb.1: 799 ; GFX908-NEXT: successors: %bb.2(0x80000000) 800 ; GFX908-NEXT: {{ $}} 801 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 802 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 803 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 804 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 805 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 806 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 807 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 808 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 809 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 810 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 811 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 812 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 813 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 814 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 815 ; GFX908-NEXT: {{ $}} 816 ; GFX908-NEXT: bb.2: 817 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 818 ; GFX908-NEXT: {{ $}} 819 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 820 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 821 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 822 ; GFX908-NEXT: {{ $}} 823 ; GFX908-NEXT: bb.3: 824 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 825 ; GFX908-NEXT: SI_RETURN 826 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) 827 ret void 828} 829 830define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 831 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16 832 ; GFX908: bb.0 (%ir-block.0): 833 ; GFX908-NEXT: successors: %bb.1(0x80000000) 834 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 835 ; GFX908-NEXT: {{ $}} 836 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 837 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 838 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 839 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 840 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 841 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 842 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 843 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 844 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 845 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 846 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 847 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 848 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 849 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 850 ; GFX908-NEXT: {{ $}} 851 ; GFX908-NEXT: bb.1: 852 ; GFX908-NEXT: successors: %bb.2(0x80000000) 853 ; GFX908-NEXT: {{ $}} 854 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 855 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 856 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 857 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 858 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 859 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 860 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 861 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 862 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 863 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 864 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 865 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 866 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 867 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 868 ; GFX908-NEXT: {{ $}} 869 ; GFX908-NEXT: bb.2: 870 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 871 ; GFX908-NEXT: {{ $}} 872 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 873 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 874 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 875 ; GFX908-NEXT: {{ $}} 876 ; GFX908-NEXT: bb.3: 877 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 878 ; GFX908-NEXT: SI_RETURN 879 %voffset = add i32 %voffset.base, 16 880 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 881 ret void 882} 883 884define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 885 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 886 ; GFX908: bb.0 (%ir-block.0): 887 ; GFX908-NEXT: successors: %bb.1(0x80000000) 888 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 889 ; GFX908-NEXT: {{ $}} 890 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 891 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 892 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 893 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 894 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 895 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 896 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 897 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 898 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 899 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 900 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 901 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 902 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 903 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 904 ; GFX908-NEXT: {{ $}} 905 ; GFX908-NEXT: bb.1: 906 ; GFX908-NEXT: successors: %bb.2(0x80000000) 907 ; GFX908-NEXT: {{ $}} 908 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 909 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 910 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 911 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 912 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 913 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 914 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 915 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 916 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 917 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 918 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 919 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 920 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 921 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 922 ; GFX908-NEXT: {{ $}} 923 ; GFX908-NEXT: bb.2: 924 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 925 ; GFX908-NEXT: {{ $}} 926 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 927 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 928 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 929 ; GFX908-NEXT: {{ $}} 930 ; GFX908-NEXT: bb.3: 931 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 932 ; GFX908-NEXT: SI_RETURN 933 %voffset = add i32 %voffset.base, 4095 934 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 935 ret void 936} 937 938define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 939 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096 940 ; GFX908: bb.0 (%ir-block.0): 941 ; GFX908-NEXT: successors: %bb.1(0x80000000) 942 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 943 ; GFX908-NEXT: {{ $}} 944 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 945 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 946 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 947 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 948 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 949 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 950 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 951 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 952 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 953 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 954 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 955 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 956 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 957 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 958 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 959 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 960 ; GFX908-NEXT: {{ $}} 961 ; GFX908-NEXT: bb.1: 962 ; GFX908-NEXT: successors: %bb.2(0x80000000) 963 ; GFX908-NEXT: {{ $}} 964 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 965 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 966 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 967 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 968 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 969 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 970 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 971 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 972 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 973 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 974 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 975 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 976 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 977 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 978 ; GFX908-NEXT: {{ $}} 979 ; GFX908-NEXT: bb.2: 980 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 981 ; GFX908-NEXT: {{ $}} 982 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 983 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 984 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 985 ; GFX908-NEXT: {{ $}} 986 ; GFX908-NEXT: bb.3: 987 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 988 ; GFX908-NEXT: SI_RETURN 989 %voffset = add i32 %voffset.base, 4096 990 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 991 ret void 992} 993 994define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(float %val, <4 x i32> %rsrc, i32 %voffset) { 995 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095 996 ; GFX908: bb.0 (%ir-block.0): 997 ; GFX908-NEXT: successors: %bb.1(0x80000000) 998 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 999 ; GFX908-NEXT: {{ $}} 1000 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1001 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1002 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1003 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1004 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1005 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1006 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1007 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1008 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1009 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1010 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1011 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 1012 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1013 ; GFX908-NEXT: {{ $}} 1014 ; GFX908-NEXT: bb.1: 1015 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1016 ; GFX908-NEXT: {{ $}} 1017 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1018 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1019 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1020 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1021 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1022 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1023 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1024 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1025 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1026 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1027 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1028 ; GFX908-NEXT: {{ $}} 1029 ; GFX908-NEXT: bb.2: 1030 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1031 ; GFX908-NEXT: {{ $}} 1032 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1033 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1034 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1035 ; GFX908-NEXT: {{ $}} 1036 ; GFX908-NEXT: bb.3: 1037 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1038 ; GFX908-NEXT: SI_RETURN 1039 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) 1040 ret void 1041} 1042 1043define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(float %val, <4 x i32> %rsrc, i32 %voffset) { 1044 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096 1045 ; GFX908: bb.0 (%ir-block.0): 1046 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1047 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1048 ; GFX908-NEXT: {{ $}} 1049 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1050 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1051 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1052 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1053 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1054 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1055 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1056 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1057 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1058 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1059 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 1060 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1061 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1062 ; GFX908-NEXT: {{ $}} 1063 ; GFX908-NEXT: bb.1: 1064 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1065 ; GFX908-NEXT: {{ $}} 1066 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1067 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1068 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1069 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1070 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1071 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1072 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1073 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1074 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1075 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1076 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 1077 ; GFX908-NEXT: {{ $}} 1078 ; GFX908-NEXT: bb.2: 1079 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1080 ; GFX908-NEXT: {{ $}} 1081 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1082 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1083 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1084 ; GFX908-NEXT: {{ $}} 1085 ; GFX908-NEXT: bb.3: 1086 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1087 ; GFX908-NEXT: SI_RETURN 1088 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) 1089 ret void 1090} 1091 1092define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1093 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16 1094 ; GFX908: bb.0 (%ir-block.0): 1095 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1096 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 1097 ; GFX908-NEXT: {{ $}} 1098 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 1099 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1100 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1101 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1102 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1103 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1104 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1105 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1106 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1107 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1108 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1109 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1110 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 1111 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1112 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1113 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1114 ; GFX908-NEXT: {{ $}} 1115 ; GFX908-NEXT: bb.1: 1116 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1117 ; GFX908-NEXT: {{ $}} 1118 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1119 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1120 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1121 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1122 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1123 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1124 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1125 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1126 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1127 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1128 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1129 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1130 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1131 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1132 ; GFX908-NEXT: {{ $}} 1133 ; GFX908-NEXT: bb.2: 1134 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1135 ; GFX908-NEXT: {{ $}} 1136 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1137 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1138 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1139 ; GFX908-NEXT: {{ $}} 1140 ; GFX908-NEXT: bb.3: 1141 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1142 ; GFX908-NEXT: SI_RETURN 1143 %soffset = add i32 %soffset.base, 16 1144 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 1145 ret void 1146} 1147 1148define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1149 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095 1150 ; GFX908: bb.0 (%ir-block.0): 1151 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1152 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 1153 ; GFX908-NEXT: {{ $}} 1154 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 1155 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1156 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1157 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1158 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1159 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1160 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1161 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1162 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1163 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1164 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1165 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1166 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 1167 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1168 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1169 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1170 ; GFX908-NEXT: {{ $}} 1171 ; GFX908-NEXT: bb.1: 1172 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1173 ; GFX908-NEXT: {{ $}} 1174 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1175 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1176 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1177 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1178 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1179 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1180 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1181 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1182 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1183 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1184 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1185 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1186 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1187 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1188 ; GFX908-NEXT: {{ $}} 1189 ; GFX908-NEXT: bb.2: 1190 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1191 ; GFX908-NEXT: {{ $}} 1192 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1193 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1194 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1195 ; GFX908-NEXT: {{ $}} 1196 ; GFX908-NEXT: bb.3: 1197 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1198 ; GFX908-NEXT: SI_RETURN 1199 %soffset = add i32 %soffset.base, 4095 1200 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 1201 ret void 1202} 1203 1204define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1205 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096 1206 ; GFX908: bb.0 (%ir-block.0): 1207 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1208 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 1209 ; GFX908-NEXT: {{ $}} 1210 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 1211 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1212 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1213 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1214 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1215 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1216 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1217 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1218 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1219 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1220 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1221 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1222 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1223 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1224 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1225 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1226 ; GFX908-NEXT: {{ $}} 1227 ; GFX908-NEXT: bb.1: 1228 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1229 ; GFX908-NEXT: {{ $}} 1230 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1231 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1232 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1233 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1234 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1235 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1236 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1237 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1238 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1239 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1240 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1241 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1242 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1243 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1244 ; GFX908-NEXT: {{ $}} 1245 ; GFX908-NEXT: bb.2: 1246 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1247 ; GFX908-NEXT: {{ $}} 1248 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1249 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1250 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1251 ; GFX908-NEXT: {{ $}} 1252 ; GFX908-NEXT: bb.3: 1253 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1254 ; GFX908-NEXT: SI_RETURN 1255 %soffset = add i32 %soffset.base, 4096 1256 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 1257 ret void 1258} 1259 1260; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 1261define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) { 1262 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000 1263 ; GFX908: bb.0 (%ir-block.0): 1264 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1265 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 1266 ; GFX908-NEXT: {{ $}} 1267 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 1268 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1269 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1270 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1271 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1272 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1273 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1274 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1275 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1276 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1277 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1278 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1279 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 1280 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec 1281 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1282 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1283 ; GFX908-NEXT: {{ $}} 1284 ; GFX908-NEXT: bb.1: 1285 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1286 ; GFX908-NEXT: {{ $}} 1287 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1288 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1289 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1290 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1291 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1292 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1293 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1294 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1295 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1296 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1297 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec 1298 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec 1299 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1300 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1301 ; GFX908-NEXT: {{ $}} 1302 ; GFX908-NEXT: bb.2: 1303 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1304 ; GFX908-NEXT: {{ $}} 1305 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1306 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1307 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1308 ; GFX908-NEXT: {{ $}} 1309 ; GFX908-NEXT: bb.3: 1310 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1311 ; GFX908-NEXT: SI_RETURN 1312 %soffset = add i32 %soffset.base, 5000 1313 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 1314 ret void 1315} 1316 1317; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 1318define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) { 1319 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000 1320 ; GFX908: bb.0 (%ir-block.0): 1321 ; GFX908-NEXT: successors: %bb.1(0x80000000) 1322 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 1323 ; GFX908-NEXT: {{ $}} 1324 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6 1325 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 1326 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 1327 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 1328 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 1329 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1330 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1331 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1332 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1333 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1334 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF 1335 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 1336 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 1337 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec 1338 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 1339 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 1340 ; GFX908-NEXT: {{ $}} 1341 ; GFX908-NEXT: bb.1: 1342 ; GFX908-NEXT: successors: %bb.2(0x80000000) 1343 ; GFX908-NEXT: {{ $}} 1344 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec 1345 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec 1346 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 1347 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec 1348 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec 1349 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec 1350 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 1351 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec 1352 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc 1353 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 1354 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 1355 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec 1356 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc 1357 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 1358 ; GFX908-NEXT: {{ $}} 1359 ; GFX908-NEXT: bb.2: 1360 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 1361 ; GFX908-NEXT: {{ $}} 1362 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 1363 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 1364 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec 1365 ; GFX908-NEXT: {{ $}} 1366 ; GFX908-NEXT: bb.3: 1367 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] 1368 ; GFX908-NEXT: SI_RETURN 1369 %voffset = add i32 %voffset.base, 5000 1370 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) 1371 ret void 1372} 1373 1374declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 1375declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 1376declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 1377declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg) 1378