1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX12 %s 5 6define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 7 ; GFX10-LABEL: name: sample_d_1d_g16_a16 8 ; GFX10: bb.1.main_body: 9 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 10 ; GFX10-NEXT: {{ $}} 11 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 12 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 13 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 14 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 15 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 16 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 17 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 18 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 19 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 20 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 21 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 22 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 23 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 24 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 25 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 26 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 27 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 28 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 29 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 30 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 31 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 32 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) 33 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) 34 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 35 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 36 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 37 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) 38 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) 39 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) 40 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) 41 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 42 ; 43 ; GFX11-LABEL: name: sample_d_1d_g16_a16 44 ; GFX11: bb.1.main_body: 45 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 46 ; GFX11-NEXT: {{ $}} 47 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 48 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 49 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 50 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 51 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 52 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 53 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 54 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 55 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 56 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 57 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 58 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 59 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 60 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 61 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 62 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 63 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 64 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 65 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 66 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 67 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 68 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) 69 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) 70 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 71 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 72 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 73 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) 74 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) 75 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) 76 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) 77 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 78 ; 79 ; GFX12-LABEL: name: sample_d_1d_g16_a16 80 ; GFX12: bb.1.main_body: 81 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 82 ; GFX12-NEXT: {{ $}} 83 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 84 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 85 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 86 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 87 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 88 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 89 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 90 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 91 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 92 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 93 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 94 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 95 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 96 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 97 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 98 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 99 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 100 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 101 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 102 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 103 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 104 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) 105 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) 106 ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 107 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 108 ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 109 ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) 110 ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) 111 ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) 112 ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) 113 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 114main_body: 115 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 116 ret <4 x float> %v 117} 118 119define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 120 ; GFX10-LABEL: name: sample_d_2d_g16_a16 121 ; GFX10: bb.1.main_body: 122 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 123 ; GFX10-NEXT: {{ $}} 124 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 125 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 126 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 127 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 128 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 129 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 130 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 131 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 132 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 133 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 134 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 135 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 136 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 137 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 138 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 139 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 140 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 141 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 142 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 143 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 144 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 145 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 146 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 147 ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 148 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 149 ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 150 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 151 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) 152 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) 153 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 154 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 155 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) 156 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) 157 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) 158 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) 159 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 160 ; 161 ; GFX11-LABEL: name: sample_d_2d_g16_a16 162 ; GFX11: bb.1.main_body: 163 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 164 ; GFX11-NEXT: {{ $}} 165 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 166 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 167 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 168 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 169 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 170 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 171 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 172 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 173 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 174 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 175 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 176 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 177 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 178 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 179 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 180 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 181 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 182 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 183 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 184 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 185 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 186 ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 187 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 188 ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 189 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 190 ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 191 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 192 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) 193 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) 194 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 195 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 196 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) 197 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) 198 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) 199 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) 200 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 201 ; 202 ; GFX12-LABEL: name: sample_d_2d_g16_a16 203 ; GFX12: bb.1.main_body: 204 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 205 ; GFX12-NEXT: {{ $}} 206 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 207 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 208 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 209 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 210 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 211 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 212 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 213 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 214 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 215 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 216 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 217 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 218 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 219 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 220 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 221 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 222 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 223 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 224 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 225 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 226 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 227 ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 228 ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 229 ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 230 ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 231 ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 232 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 233 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) 234 ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) 235 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 236 ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 237 ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) 238 ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) 239 ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) 240 ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) 241 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 242main_body: 243 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 244 ret <4 x float> %v 245} 246 247define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 248 ; GFX10-LABEL: name: sample_d_3d_g16_a16 249 ; GFX10: bb.1.main_body: 250 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 251 ; GFX10-NEXT: {{ $}} 252 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 253 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 254 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 255 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 256 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 257 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 258 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 259 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 260 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 261 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 262 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 263 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 264 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 265 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 266 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 267 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 268 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 269 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 270 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 271 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 272 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 273 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 274 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 275 ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 276 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 277 ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 278 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 279 ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) 280 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 281 ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) 282 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 283 ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) 284 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 285 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 286 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 287 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) 288 ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) 289 ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) 290 ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) 291 ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) 292 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 293 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 294 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) 295 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) 296 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) 297 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) 298 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 299 ; 300 ; GFX11-LABEL: name: sample_d_3d_g16_a16 301 ; GFX11: bb.1.main_body: 302 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 303 ; GFX11-NEXT: {{ $}} 304 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 305 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 306 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 307 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 308 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 309 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 310 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 311 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 312 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 313 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 314 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 315 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 316 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 317 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 318 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 319 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 320 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 321 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 322 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 323 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 324 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 325 ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 326 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 327 ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 328 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 329 ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 330 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 331 ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) 332 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 333 ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) 334 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 335 ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) 336 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 337 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 338 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 339 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) 340 ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) 341 ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) 342 ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) 343 ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) 344 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 345 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 346 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) 347 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) 348 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) 349 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) 350 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 351 ; 352 ; GFX12-LABEL: name: sample_d_3d_g16_a16 353 ; GFX12: bb.1.main_body: 354 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 355 ; GFX12-NEXT: {{ $}} 356 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 357 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 358 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 359 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 360 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 361 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 362 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 363 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 364 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 365 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 366 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 367 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 368 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 369 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) 370 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 371 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) 372 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 373 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) 374 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 375 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) 376 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 377 ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) 378 ; GFX12-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 379 ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) 380 ; GFX12-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 381 ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) 382 ; GFX12-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 383 ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) 384 ; GFX12-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 385 ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) 386 ; GFX12-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 387 ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) 388 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 389 ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 390 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) 391 ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) 392 ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) 393 ; GFX12-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) 394 ; GFX12-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) 395 ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) 396 ; GFX12-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8) 397 ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 398 ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) 399 ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) 400 ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) 401 ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) 402 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 403main_body: 404 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 405 ret <4 x float> %v 406} 407 408declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 409declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 410declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 411