1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s 4 5; Natural mapping 6define amdgpu_ps half @raw_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 7 ; PACKED-LABEL: name: raw_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 8 ; PACKED: bb.1 (%ir-block.0): 9 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 10 ; PACKED-NEXT: {{ $}} 11 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 16 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 17 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 18 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 19 ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] 20 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 21 ; 22 ; UNPACKED-LABEL: name: raw_ptr_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 23 ; UNPACKED: bb.1 (%ir-block.0): 24 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 25 ; UNPACKED-NEXT: {{ $}} 26 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 27 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 28 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 29 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 30 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 31 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 32 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 33 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 34 ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 35 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 36 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 37 ret half %val 38} 39 40define amdgpu_ps <2 x half> @raw_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 41 ; PACKED-LABEL: name: raw_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 42 ; PACKED: bb.1 (%ir-block.0): 43 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 44 ; PACKED-NEXT: {{ $}} 45 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 46 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 47 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 48 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 49 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 50 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 51 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 52 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) 53 ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] 54 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 55 ; 56 ; UNPACKED-LABEL: name: raw_ptr_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 57 ; UNPACKED: bb.1 (%ir-block.0): 58 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 59 ; UNPACKED-NEXT: {{ $}} 60 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 61 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 62 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 63 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 64 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 65 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 66 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 67 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from %ir.rsrc, align 1, addrspace 8) 68 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 69 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 70 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 71 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 72 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec 73 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 74 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec 75 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 76 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 77 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec 78 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec 79 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] 80 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 81 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 82 ret <2 x half> %val 83} 84 85; FIXME 86; define amdgpu_ps <3 x half> @raw_ptr_buffer_load_format_v3f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 87; %val = call <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 88; ret <3 x half> %val 89; } 90 91define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 92 ; PACKED-LABEL: name: raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 93 ; PACKED: bb.1 (%ir-block.0): 94 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 95 ; PACKED-NEXT: {{ $}} 96 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 97 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 98 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 99 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 100 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 101 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 102 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 103 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) 104 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 105 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 106 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] 107 ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] 108 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 109 ; 110 ; UNPACKED-LABEL: name: raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 111 ; UNPACKED: bb.1 (%ir-block.0): 112 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 113 ; UNPACKED-NEXT: {{ $}} 114 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 115 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 116 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 117 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 118 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 119 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 120 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 121 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) 122 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 123 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 124 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 125 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 126 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 127 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 128 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec 129 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 130 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec 131 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 132 ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 133 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec 134 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec 135 ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 136 ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec 137 ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 138 ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec 139 ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 140 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec 141 ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec 142 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] 143 ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] 144 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 145 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 146 ret <4 x half> %val 147} 148 149; Waterfall for rsrc and soffset, copy for voffset 150define amdgpu_ps half @raw_ptr_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset(ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { 151 ; PACKED-LABEL: name: raw_ptr_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 152 ; PACKED: bb.1 (%ir-block.0): 153 ; PACKED-NEXT: successors: %bb.2(0x80000000) 154 ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 155 ; PACKED-NEXT: {{ $}} 156 ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 157 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 158 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 159 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 160 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 161 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 162 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 163 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 164 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 165 ; PACKED-NEXT: {{ $}} 166 ; PACKED-NEXT: bb.2: 167 ; PACKED-NEXT: successors: %bb.3(0x80000000) 168 ; PACKED-NEXT: {{ $}} 169 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 170 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 171 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 172 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 173 ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 174 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 175 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 176 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 177 ; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 178 ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 179 ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 180 ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 181 ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 182 ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 183 ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 184 ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 185 ; PACKED-NEXT: {{ $}} 186 ; PACKED-NEXT: bb.3: 187 ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 188 ; PACKED-NEXT: {{ $}} 189 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 190 ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 191 ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 192 ; PACKED-NEXT: {{ $}} 193 ; PACKED-NEXT: bb.4: 194 ; PACKED-NEXT: successors: %bb.5(0x80000000) 195 ; PACKED-NEXT: {{ $}} 196 ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 197 ; PACKED-NEXT: {{ $}} 198 ; PACKED-NEXT: bb.5: 199 ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] 200 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 201 ; 202 ; UNPACKED-LABEL: name: raw_ptr_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 203 ; UNPACKED: bb.1 (%ir-block.0): 204 ; UNPACKED-NEXT: successors: %bb.2(0x80000000) 205 ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 206 ; UNPACKED-NEXT: {{ $}} 207 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 208 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 209 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 210 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 211 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 212 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 213 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 214 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 215 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 216 ; UNPACKED-NEXT: {{ $}} 217 ; UNPACKED-NEXT: bb.2: 218 ; UNPACKED-NEXT: successors: %bb.3(0x80000000) 219 ; UNPACKED-NEXT: {{ $}} 220 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 221 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 222 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 223 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 224 ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 225 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 226 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 227 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 228 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 229 ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 230 ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 231 ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 232 ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 233 ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 234 ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 235 ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 236 ; UNPACKED-NEXT: {{ $}} 237 ; UNPACKED-NEXT: bb.3: 238 ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 239 ; UNPACKED-NEXT: {{ $}} 240 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from %ir.rsrc, align 1, addrspace 8) 241 ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 242 ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 243 ; UNPACKED-NEXT: {{ $}} 244 ; UNPACKED-NEXT: bb.4: 245 ; UNPACKED-NEXT: successors: %bb.5(0x80000000) 246 ; UNPACKED-NEXT: {{ $}} 247 ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 248 ; UNPACKED-NEXT: {{ $}} 249 ; UNPACKED-NEXT: bb.5: 250 ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 251 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 252 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 253 ret half %val 254} 255 256define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add_4095(ptr addrspace(8) inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 257 ; PACKED-LABEL: name: raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add_4095 258 ; PACKED: bb.1 (%ir-block.0): 259 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 260 ; PACKED-NEXT: {{ $}} 261 ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 262 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 263 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 264 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 265 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 266 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 267 ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 268 ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) 269 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 270 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 271 ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] 272 ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] 273 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 274 ; 275 ; UNPACKED-LABEL: name: raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add_4095 276 ; UNPACKED: bb.1 (%ir-block.0): 277 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 278 ; UNPACKED-NEXT: {{ $}} 279 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 280 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 281 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 282 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 283 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 284 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 285 ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 286 ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from %ir.rsrc, align 1, addrspace 8) 287 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 288 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 289 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 290 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 291 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 292 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 293 ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec 294 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 295 ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec 296 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 297 ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 298 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec 299 ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec 300 ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 301 ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec 302 ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 303 ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec 304 ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 305 ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec 306 ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec 307 ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] 308 ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] 309 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 310 %voffset = add i32 %voffset.base, 4095 311 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 312 ret <4 x half> %val 313} 314 315declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) #0 316declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) #0 317declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) #0 318declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) #0 319 320attributes #0 = { nounwind readonly } 321