1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 5 ; GCN-LABEL: name: image_load_f32 6 ; GCN: bb.1 (%ir-block.0): 7 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 8 ; GCN-NEXT: {{ $}} 9 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 18 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 19 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 20 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 21 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) 22 ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 23 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 24 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 25 ret float %tex 26} 27 28define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 29 ; GCN-LABEL: name: image_load_v2f32 30 ; GCN: bb.1 (%ir-block.0): 31 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 32 ; GCN-NEXT: {{ $}} 33 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 34 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 35 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 36 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 37 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 38 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 39 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 40 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 41 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 42 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 43 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 44 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 45 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 46 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 47 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 48 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 49 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 50 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 51 ret <2 x float> %tex 52} 53 54define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 55 ; GCN-LABEL: name: image_load_v3f32 56 ; GCN: bb.1 (%ir-block.0): 57 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 58 ; GCN-NEXT: {{ $}} 59 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 60 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 61 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 62 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 63 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 64 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 65 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 66 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 67 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 68 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 69 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 70 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 71 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) 72 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 73 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 74 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 75 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 76 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 77 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 78 ret <3 x float> %tex 79} 80 81define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 82 ; GCN-LABEL: name: image_load_v4f32 83 ; GCN: bb.1 (%ir-block.0): 84 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 85 ; GCN-NEXT: {{ $}} 86 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 87 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 88 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 89 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 90 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 91 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 92 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 93 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 94 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 95 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 96 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 97 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 98 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) 99 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 100 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 101 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 102 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 103 ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) 104 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 105 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 106 ret <4 x float> %tex 107} 108 109define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 110 ; GCN-LABEL: name: image_load_tfe_f32 111 ; GCN: bb.1 (%ir-block.0): 112 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 113 ; GCN-NEXT: {{ $}} 114 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 115 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 116 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 117 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 118 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 119 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 120 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 121 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 122 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 123 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 124 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 125 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 126 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 127 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 128 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 129 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 130 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 131 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 132 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 133 %tex = extractvalue { float, i32 } %res, 0 134 %tfe = extractvalue { float, i32 } %res, 1 135 store i32 %tfe, ptr addrspace(1) undef 136 ret float %tex 137} 138 139define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 140 ; GCN-LABEL: name: image_load_tfe_v2f32 141 ; GCN: bb.1 (%ir-block.0): 142 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 143 ; GCN-NEXT: {{ $}} 144 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 145 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 146 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 147 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 148 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 149 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 150 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 151 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 152 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 153 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 154 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 155 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 156 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 157 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 158 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 159 ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 160 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 161 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 162 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 163 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 164 %tex = extractvalue { <2 x float>, i32 } %res, 0 165 %tfe = extractvalue { <2 x float>, i32 } %res, 1 166 store i32 %tfe, ptr addrspace(1) undef 167 ret <2 x float> %tex 168} 169 170define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 171 ; GCN-LABEL: name: image_load_tfe_v3f32 172 ; GCN: bb.1 (%ir-block.0): 173 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 174 ; GCN-NEXT: {{ $}} 175 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 176 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 177 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 178 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 179 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 180 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 181 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 182 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 183 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 184 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 185 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 186 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 187 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 188 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) 189 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 190 ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 191 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 192 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 193 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 194 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 195 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 196 %tex = extractvalue { <3 x float>, i32 } %res, 0 197 %tfe = extractvalue { <3 x float>, i32 } %res, 1 198 store i32 %tfe, ptr addrspace(1) undef 199 ret <3 x float> %tex 200} 201 202define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 203 ; GCN-LABEL: name: image_load_tfe_v4f32 204 ; GCN: bb.1 (%ir-block.0): 205 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 206 ; GCN-NEXT: {{ $}} 207 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 208 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 209 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 210 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 211 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 212 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 213 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 214 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 215 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 216 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 217 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 218 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 219 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 220 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8) 221 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) 222 ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 223 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 224 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 225 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 226 ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) 227 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 228 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 229 %tex = extractvalue { <4 x float>, i32 } %res, 0 230 %tfe = extractvalue { <4 x float>, i32 } %res, 1 231 store i32 %tfe, ptr addrspace(1) undef 232 ret <4 x float> %tex 233} 234 235define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 236 ; GCN-LABEL: name: image_load_f32_dmask_0000 237 ; GCN: bb.1 (%ir-block.0): 238 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 239 ; GCN-NEXT: {{ $}} 240 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 241 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 242 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 243 ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) 244 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 245 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 246 ret float %tex 247} 248 249define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 250 ; GCN-LABEL: name: image_load_v2f32_dmask_1000 251 ; GCN: bb.1 (%ir-block.0): 252 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 253 ; GCN-NEXT: {{ $}} 254 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 255 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 256 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 257 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 258 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 259 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 260 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 261 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 262 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 263 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 264 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 265 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 266 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) 267 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 268 ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 269 ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) 270 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 271 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 272 ret <2 x float> %tex 273} 274 275define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 276 ; GCN-LABEL: name: image_load_v2f32_dmask_0000 277 ; GCN: bb.1 (%ir-block.0): 278 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 279 ; GCN-NEXT: {{ $}} 280 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 281 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 282 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF 283 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) 284 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 285 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 286 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 287 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 288 ret <2 x float> %tex 289} 290 291define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 292 ; GCN-LABEL: name: image_load_v3f32_dmask_1100 293 ; GCN: bb.1 (%ir-block.0): 294 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 295 ; GCN-NEXT: {{ $}} 296 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 297 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 298 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 299 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 300 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 301 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 302 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 303 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 304 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 305 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 306 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 307 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 308 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 309 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 310 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 311 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 312 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 313 ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) 314 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 315 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 316 ret <3 x float> %tex 317} 318 319define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 320 ; GCN-LABEL: name: image_load_v3f32_dmask_1000 321 ; GCN: bb.1 (%ir-block.0): 322 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 323 ; GCN-NEXT: {{ $}} 324 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 325 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 326 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 327 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 328 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 329 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 330 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 331 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 332 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 333 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 334 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 335 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 336 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) 337 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 338 ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 339 ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) 340 ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) 341 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 342 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 343 ret <3 x float> %tex 344} 345 346define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 347 ; GCN-LABEL: name: image_load_v3f32_dmask_0000 348 ; GCN: bb.1 (%ir-block.0): 349 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 350 ; GCN-NEXT: {{ $}} 351 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 352 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 353 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF 354 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) 355 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 356 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 357 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 358 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 359 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 360 ret <3 x float> %tex 361} 362 363define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 364 ; GCN-LABEL: name: image_load_v4f32_dmask_1110 365 ; GCN: bb.1 (%ir-block.0): 366 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 367 ; GCN-NEXT: {{ $}} 368 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 369 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 370 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 371 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 372 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 373 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 374 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 375 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 376 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 377 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 378 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 379 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 380 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) 381 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 382 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 383 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 384 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 385 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 386 ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) 387 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 388 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 389 ret <4 x float> %tex 390} 391 392define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 393 ; GCN-LABEL: name: image_load_v4f32_dmask_1100 394 ; GCN: bb.1 (%ir-block.0): 395 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 396 ; GCN-NEXT: {{ $}} 397 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 398 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 399 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 400 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 401 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 402 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 403 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 404 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 405 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 406 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 407 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 408 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 409 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 410 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 411 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 412 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 413 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 414 ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) 415 ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) 416 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 417 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 418 ret <4 x float> %tex 419} 420 421define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 422 ; GCN-LABEL: name: image_load_v4f32_dmask_1000 423 ; GCN: bb.1 (%ir-block.0): 424 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 425 ; GCN-NEXT: {{ $}} 426 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 427 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 428 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 429 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 430 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 431 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 432 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 433 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 434 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 435 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 436 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 437 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 438 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8) 439 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 440 ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 441 ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) 442 ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) 443 ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) 444 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 445 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 446 ret <4 x float> %tex 447} 448 449define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 450 ; GCN-LABEL: name: image_load_v4f32_dmask_0000 451 ; GCN: bb.1 (%ir-block.0): 452 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 453 ; GCN-NEXT: {{ $}} 454 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 455 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 456 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF 457 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) 458 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 459 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 460 ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) 461 ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) 462 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 463 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 464 ret <4 x float> %tex 465} 466 467define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 468 ; GCN-LABEL: name: image_load_tfe_f32_dmask_0000 469 ; GCN: bb.1 (%ir-block.0): 470 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 471 ; GCN-NEXT: {{ $}} 472 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 473 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 474 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 475 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 476 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 477 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 478 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 479 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 480 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 481 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 482 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 483 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 484 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 485 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 486 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 487 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 488 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 489 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 490 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 491 %tex = extractvalue { float, i32 } %res, 0 492 %tfe = extractvalue { float, i32 } %res, 1 493 store i32 %tfe, ptr addrspace(1) undef 494 ret float %tex 495} 496 497define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 498 ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_1000 499 ; GCN: bb.1 (%ir-block.0): 500 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 501 ; GCN-NEXT: {{ $}} 502 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 503 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 504 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 505 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 506 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 507 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 508 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 509 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 510 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 511 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 512 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 513 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 514 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 515 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 516 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 517 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 518 ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 519 ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) 520 ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) 521 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 522 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 523 %tex = extractvalue { <2 x float>, i32 } %res, 0 524 %tfe = extractvalue { <2 x float>, i32 } %res, 1 525 store i32 %tfe, ptr addrspace(1) undef 526 ret <2 x float> %tex 527} 528 529define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 530 ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_0000 531 ; GCN: bb.1 (%ir-block.0): 532 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 533 ; GCN-NEXT: {{ $}} 534 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 535 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 536 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 537 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 538 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 539 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 540 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 541 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 542 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 543 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 544 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 545 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 546 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 547 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 548 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 549 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 550 ; GCN-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 551 ; GCN-NEXT: $vgpr0 = COPY [[UV2]](s32) 552 ; GCN-NEXT: $vgpr1 = COPY [[UV3]](s32) 553 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 554 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 555 %tex = extractvalue { <2 x float>, i32 } %res, 0 556 %tfe = extractvalue { <2 x float>, i32 } %res, 1 557 store i32 %tfe, ptr addrspace(1) undef 558 ret <2 x float> %tex 559} 560 561define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 562 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1100 563 ; GCN: bb.1 (%ir-block.0): 564 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 565 ; GCN-NEXT: {{ $}} 566 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 567 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 568 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 569 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 570 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 571 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 572 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 573 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 574 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 575 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 576 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 577 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 578 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 579 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 580 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 581 ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 582 ; GCN-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 583 ; GCN-NEXT: $vgpr0 = COPY [[UV3]](s32) 584 ; GCN-NEXT: $vgpr1 = COPY [[UV4]](s32) 585 ; GCN-NEXT: $vgpr2 = COPY [[UV5]](s32) 586 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 587 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 588 %tex = extractvalue { <3 x float>, i32 } %res, 0 589 %tfe = extractvalue { <3 x float>, i32 } %res, 1 590 store i32 %tfe, ptr addrspace(1) undef 591 ret <3 x float> %tex 592} 593 594define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 595 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1000 596 ; GCN: bb.1 (%ir-block.0): 597 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 598 ; GCN-NEXT: {{ $}} 599 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 600 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 601 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 602 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 603 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 604 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 605 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 606 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 607 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 608 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 609 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 610 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 611 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 612 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 613 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 614 ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 615 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 616 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 617 ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) 618 ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) 619 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 620 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 621 %tex = extractvalue { <3 x float>, i32 } %res, 0 622 %tfe = extractvalue { <3 x float>, i32 } %res, 1 623 store i32 %tfe, ptr addrspace(1) undef 624 ret <3 x float> %tex 625} 626 627define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 628 ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_0000 629 ; GCN: bb.1 (%ir-block.0): 630 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 631 ; GCN-NEXT: {{ $}} 632 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 633 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 634 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 635 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 636 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 637 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 638 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 639 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 640 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 641 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 642 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 643 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 644 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 645 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 646 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 647 ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 648 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 649 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 650 ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) 651 ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) 652 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 653 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 654 %tex = extractvalue { <3 x float>, i32 } %res, 0 655 %tfe = extractvalue { <3 x float>, i32 } %res, 1 656 store i32 %tfe, ptr addrspace(1) undef 657 ret <3 x float> %tex 658} 659 660define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 661 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1110 662 ; GCN: bb.1 (%ir-block.0): 663 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 664 ; GCN-NEXT: {{ $}} 665 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 666 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 667 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 668 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 669 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 670 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 671 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 672 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 673 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 674 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 675 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 676 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 677 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 678 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8) 679 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 680 ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 681 ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) 682 ; GCN-NEXT: $vgpr0 = COPY [[UV4]](s32) 683 ; GCN-NEXT: $vgpr1 = COPY [[UV5]](s32) 684 ; GCN-NEXT: $vgpr2 = COPY [[UV6]](s32) 685 ; GCN-NEXT: $vgpr3 = COPY [[UV7]](s32) 686 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 687 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 688 %tex = extractvalue { <4 x float>, i32 } %res, 0 689 %tfe = extractvalue { <4 x float>, i32 } %res, 1 690 store i32 %tfe, ptr addrspace(1) undef 691 ret <4 x float> %tex 692} 693 694define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 695 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1100 696 ; GCN: bb.1 (%ir-block.0): 697 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 698 ; GCN-NEXT: {{ $}} 699 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 700 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 701 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 702 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 703 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 704 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 705 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 706 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 707 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 708 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 709 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 710 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 711 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 712 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8) 713 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) 714 ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 715 ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 716 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 717 ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) 718 ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) 719 ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) 720 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 721 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 722 %tex = extractvalue { <4 x float>, i32 } %res, 0 723 %tfe = extractvalue { <4 x float>, i32 } %res, 1 724 store i32 %tfe, ptr addrspace(1) undef 725 ret <4 x float> %tex 726} 727 728define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 729 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1000 730 ; GCN: bb.1 (%ir-block.0): 731 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 732 ; GCN-NEXT: {{ $}} 733 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 734 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 735 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 736 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 737 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 738 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 739 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 740 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 741 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 742 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 743 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 744 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 745 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 746 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 747 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 748 ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 749 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 750 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 751 ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) 752 ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) 753 ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) 754 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 755 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 756 %tex = extractvalue { <4 x float>, i32 } %res, 0 757 %tfe = extractvalue { <4 x float>, i32 } %res, 1 758 store i32 %tfe, ptr addrspace(1) undef 759 ret <4 x float> %tex 760} 761 762define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 763 ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_0000 764 ; GCN: bb.1 (%ir-block.0): 765 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 766 ; GCN-NEXT: {{ $}} 767 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 768 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 769 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 770 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 771 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 772 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 773 ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 774 ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 775 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 776 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 777 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 778 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 779 ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 780 ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8) 781 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) 782 ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 783 ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 784 ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) 785 ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) 786 ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) 787 ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) 788 ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 789 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 790 %tex = extractvalue { <4 x float>, i32 } %res, 0 791 %tfe = extractvalue { <4 x float>, i32 } %res, 1 792 store i32 %tfe, ptr addrspace(1) undef 793 ret <4 x float> %tex 794} 795 796declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 797declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 798declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 799declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 800declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 801declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 802declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 803declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 804 805attributes #0 = { nounwind readonly } 806