xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll (revision 8f6a1a07cb85980013c70d5af6d28f5fcf75e732)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
5  ; GCN-LABEL: name: image_load_f32
6  ; GCN: bb.1 (%ir-block.0):
7  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
8  ; GCN-NEXT: {{  $}}
9  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
18  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
19  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
20  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
21  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8)
22  ; GCN-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
23  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
24  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
25  ret float %tex
26}
27
28define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
29  ; GCN-LABEL: name: image_load_v2f32
30  ; GCN: bb.1 (%ir-block.0):
31  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
32  ; GCN-NEXT: {{  $}}
33  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
34  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
35  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
36  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
37  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
38  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
39  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
40  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
41  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
42  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
43  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
44  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
45  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
46  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
47  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
48  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
49  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
50  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
51  ret <2 x float> %tex
52}
53
54define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
55  ; GCN-LABEL: name: image_load_v3f32
56  ; GCN: bb.1 (%ir-block.0):
57  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
58  ; GCN-NEXT: {{  $}}
59  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
60  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
61  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
62  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
63  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
64  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
65  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
66  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
67  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
68  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
69  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
70  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
71  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8)
72  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
73  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
74  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
75  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
76  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
77  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
78  ret <3 x float> %tex
79}
80
81define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
82  ; GCN-LABEL: name: image_load_v4f32
83  ; GCN: bb.1 (%ir-block.0):
84  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
85  ; GCN-NEXT: {{  $}}
86  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
87  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
88  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
89  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
90  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
91  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
92  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
93  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
94  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
95  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
96  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
97  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
98  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
99  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
100  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
101  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
102  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
103  ; GCN-NEXT:   $vgpr3 = COPY [[UV3]](s32)
104  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
105  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
106  ret <4 x float> %tex
107}
108
109define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
110  ; GCN-LABEL: name: image_load_tfe_f32
111  ; GCN: bb.1 (%ir-block.0):
112  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
113  ; GCN-NEXT: {{  $}}
114  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
115  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
116  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
117  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
118  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
119  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
120  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
121  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
122  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
123  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
124  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
125  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
126  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
127  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
128  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
129  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
130  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
131  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
132  %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
133  %tex = extractvalue { float, i32 } %res, 0
134  %tfe = extractvalue { float, i32 } %res, 1
135  store i32 %tfe, ptr addrspace(1) undef
136  ret float %tex
137}
138
139define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
140  ; GCN-LABEL: name: image_load_tfe_v2f32
141  ; GCN: bb.1 (%ir-block.0):
142  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
143  ; GCN-NEXT: {{  $}}
144  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
145  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
146  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
147  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
148  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
149  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
150  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
151  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
152  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
153  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
154  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
155  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
156  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
157  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
158  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
159  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
160  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
161  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
162  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
163  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
164  %tex = extractvalue { <2 x float>, i32 } %res, 0
165  %tfe = extractvalue { <2 x float>, i32 } %res, 1
166  store i32 %tfe, ptr addrspace(1) undef
167  ret <2 x float> %tex
168}
169
170define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
171  ; GCN-LABEL: name: image_load_tfe_v3f32
172  ; GCN: bb.1 (%ir-block.0):
173  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
174  ; GCN-NEXT: {{  $}}
175  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
176  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
177  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
178  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
179  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
180  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
181  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
182  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
183  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
184  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
185  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
186  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
187  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
188  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8)
189  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
190  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
191  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
192  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
193  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
194  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
195  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
196  %tex = extractvalue { <3 x float>, i32 } %res, 0
197  %tfe = extractvalue { <3 x float>, i32 } %res, 1
198  store i32 %tfe, ptr addrspace(1) undef
199  ret <3 x float> %tex
200}
201
202define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
203  ; GCN-LABEL: name: image_load_tfe_v4f32
204  ; GCN: bb.1 (%ir-block.0):
205  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
206  ; GCN-NEXT: {{  $}}
207  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
208  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
209  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
210  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
211  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
212  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
213  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
214  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
215  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
216  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
217  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
218  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
219  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
220  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
221  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
222  ; GCN-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
223  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
224  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
225  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
226  ; GCN-NEXT:   $vgpr3 = COPY [[UV3]](s32)
227  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
228  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
229  %tex = extractvalue { <4 x float>, i32 } %res, 0
230  %tfe = extractvalue { <4 x float>, i32 } %res, 1
231  store i32 %tfe, ptr addrspace(1) undef
232  ret <4 x float> %tex
233}
234
235define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
236  ; GCN-LABEL: name: image_load_f32_dmask_0000
237  ; GCN: bb.1 (%ir-block.0):
238  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
239  ; GCN-NEXT: {{  $}}
240  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
241  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
242  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
243  ; GCN-NEXT:   $vgpr0 = COPY [[DEF]](s32)
244  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
245  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
246  ret float %tex
247}
248
249define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
250  ; GCN-LABEL: name: image_load_v2f32_dmask_1000
251  ; GCN: bb.1 (%ir-block.0):
252  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
253  ; GCN-NEXT: {{  $}}
254  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
255  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
256  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
257  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
258  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
259  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
260  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
261  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
262  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
263  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
264  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
265  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
266  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8)
267  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
268  ; GCN-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
269  ; GCN-NEXT:   $vgpr1 = COPY [[DEF]](s32)
270  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
271  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
272  ret <2 x float> %tex
273}
274
275define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
276  ; GCN-LABEL: name: image_load_v2f32_dmask_0000
277  ; GCN: bb.1 (%ir-block.0):
278  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
279  ; GCN-NEXT: {{  $}}
280  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
281  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
282  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
283  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
284  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
285  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
286  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
287  %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
288  ret <2 x float> %tex
289}
290
291define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
292  ; GCN-LABEL: name: image_load_v3f32_dmask_1100
293  ; GCN: bb.1 (%ir-block.0):
294  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
295  ; GCN-NEXT: {{  $}}
296  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
297  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
298  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
299  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
300  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
301  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
302  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
303  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
304  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
305  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
306  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
307  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
308  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
309  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
310  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
311  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
312  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
313  ; GCN-NEXT:   $vgpr2 = COPY [[DEF]](s32)
314  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
315  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
316  ret <3 x float> %tex
317}
318
319define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
320  ; GCN-LABEL: name: image_load_v3f32_dmask_1000
321  ; GCN: bb.1 (%ir-block.0):
322  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
323  ; GCN-NEXT: {{  $}}
324  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
325  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
326  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
327  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
328  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
329  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
330  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
331  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
332  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
333  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
334  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
335  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
336  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8)
337  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
338  ; GCN-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
339  ; GCN-NEXT:   $vgpr1 = COPY [[DEF]](s32)
340  ; GCN-NEXT:   $vgpr2 = COPY [[DEF]](s32)
341  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
342  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
343  ret <3 x float> %tex
344}
345
346define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
347  ; GCN-LABEL: name: image_load_v3f32_dmask_0000
348  ; GCN: bb.1 (%ir-block.0):
349  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
350  ; GCN-NEXT: {{  $}}
351  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
352  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
353  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
354  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
355  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
356  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
357  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
358  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
359  %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
360  ret <3 x float> %tex
361}
362
363define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
364  ; GCN-LABEL: name: image_load_v4f32_dmask_1110
365  ; GCN: bb.1 (%ir-block.0):
366  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
367  ; GCN-NEXT: {{  $}}
368  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
369  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
370  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
371  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
372  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
373  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
374  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
375  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
376  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
377  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
378  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
379  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
380  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8)
381  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
382  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
383  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
384  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
385  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
386  ; GCN-NEXT:   $vgpr3 = COPY [[DEF]](s32)
387  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
388  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
389  ret <4 x float> %tex
390}
391
392define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
393  ; GCN-LABEL: name: image_load_v4f32_dmask_1100
394  ; GCN: bb.1 (%ir-block.0):
395  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
396  ; GCN-NEXT: {{  $}}
397  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
398  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
399  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
400  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
401  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
402  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
403  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
404  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
405  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
406  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
407  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
408  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
409  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
410  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
411  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
412  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
413  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
414  ; GCN-NEXT:   $vgpr2 = COPY [[DEF]](s32)
415  ; GCN-NEXT:   $vgpr3 = COPY [[DEF]](s32)
416  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
417  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
418  ret <4 x float> %tex
419}
420
421define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
422  ; GCN-LABEL: name: image_load_v4f32_dmask_1000
423  ; GCN: bb.1 (%ir-block.0):
424  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
425  ; GCN-NEXT: {{  $}}
426  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
427  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
428  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
429  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
430  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
431  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
432  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
433  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
434  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
435  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
436  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
437  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
438  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 8)
439  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
440  ; GCN-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
441  ; GCN-NEXT:   $vgpr1 = COPY [[DEF]](s32)
442  ; GCN-NEXT:   $vgpr2 = COPY [[DEF]](s32)
443  ; GCN-NEXT:   $vgpr3 = COPY [[DEF]](s32)
444  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
445  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
446  ret <4 x float> %tex
447}
448
449define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
450  ; GCN-LABEL: name: image_load_v4f32_dmask_0000
451  ; GCN: bb.1 (%ir-block.0):
452  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
453  ; GCN-NEXT: {{  $}}
454  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
455  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
456  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
457  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
458  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
459  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
460  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
461  ; GCN-NEXT:   $vgpr3 = COPY [[UV3]](s32)
462  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
463  %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
464  ret <4 x float> %tex
465}
466
467define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
468  ; GCN-LABEL: name: image_load_tfe_f32_dmask_0000
469  ; GCN: bb.1 (%ir-block.0):
470  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
471  ; GCN-NEXT: {{  $}}
472  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
473  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
474  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
475  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
476  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
477  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
478  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
479  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
480  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
481  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
482  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
483  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
484  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
485  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
486  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
487  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
488  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
489  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
490  %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
491  %tex = extractvalue { float, i32 } %res, 0
492  %tfe = extractvalue { float, i32 } %res, 1
493  store i32 %tfe, ptr addrspace(1) undef
494  ret float %tex
495}
496
497define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
498  ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_1000
499  ; GCN: bb.1 (%ir-block.0):
500  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
501  ; GCN-NEXT: {{  $}}
502  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
503  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
504  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
505  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
506  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
507  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
508  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
509  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
510  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
511  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
512  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
513  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
514  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
515  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
516  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
517  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
518  ; GCN-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
519  ; GCN-NEXT:   $vgpr0 = COPY [[UV2]](s32)
520  ; GCN-NEXT:   $vgpr1 = COPY [[UV3]](s32)
521  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
522  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
523  %tex = extractvalue { <2 x float>, i32 } %res, 0
524  %tfe = extractvalue { <2 x float>, i32 } %res, 1
525  store i32 %tfe, ptr addrspace(1) undef
526  ret <2 x float> %tex
527}
528
529define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
530  ; GCN-LABEL: name: image_load_tfe_v2f32_dmask_0000
531  ; GCN: bb.1 (%ir-block.0):
532  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
533  ; GCN-NEXT: {{  $}}
534  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
535  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
536  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
537  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
538  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
539  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
540  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
541  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
542  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
543  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
544  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
545  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
546  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
547  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
548  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
549  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
550  ; GCN-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
551  ; GCN-NEXT:   $vgpr0 = COPY [[UV2]](s32)
552  ; GCN-NEXT:   $vgpr1 = COPY [[UV3]](s32)
553  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
554  %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
555  %tex = extractvalue { <2 x float>, i32 } %res, 0
556  %tfe = extractvalue { <2 x float>, i32 } %res, 1
557  store i32 %tfe, ptr addrspace(1) undef
558  ret <2 x float> %tex
559}
560
561define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
562  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1100
563  ; GCN: bb.1 (%ir-block.0):
564  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
565  ; GCN-NEXT: {{  $}}
566  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
567  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
568  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
569  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
570  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
571  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
572  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
573  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
574  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
575  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
576  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
577  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
578  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
579  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
580  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
581  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
582  ; GCN-NEXT:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
583  ; GCN-NEXT:   $vgpr0 = COPY [[UV3]](s32)
584  ; GCN-NEXT:   $vgpr1 = COPY [[UV4]](s32)
585  ; GCN-NEXT:   $vgpr2 = COPY [[UV5]](s32)
586  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
587  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
588  %tex = extractvalue { <3 x float>, i32 } %res, 0
589  %tfe = extractvalue { <3 x float>, i32 } %res, 1
590  store i32 %tfe, ptr addrspace(1) undef
591  ret <3 x float> %tex
592}
593
594define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
595  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_1000
596  ; GCN: bb.1 (%ir-block.0):
597  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
598  ; GCN-NEXT: {{  $}}
599  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
600  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
601  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
602  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
603  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
604  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
605  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
606  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
607  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
608  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
609  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
610  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
611  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
612  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
613  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
614  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
615  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
616  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
617  ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
618  ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
619  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
620  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
621  %tex = extractvalue { <3 x float>, i32 } %res, 0
622  %tfe = extractvalue { <3 x float>, i32 } %res, 1
623  store i32 %tfe, ptr addrspace(1) undef
624  ret <3 x float> %tex
625}
626
627define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
628  ; GCN-LABEL: name: image_load_tfe_v3f32_dmask_0000
629  ; GCN: bb.1 (%ir-block.0):
630  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
631  ; GCN-NEXT: {{  $}}
632  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
633  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
634  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
635  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
636  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
637  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
638  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
639  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
640  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
641  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
642  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
643  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
644  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
645  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
646  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
647  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
648  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
649  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
650  ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
651  ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
652  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
653  %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
654  %tex = extractvalue { <3 x float>, i32 } %res, 0
655  %tfe = extractvalue { <3 x float>, i32 } %res, 1
656  store i32 %tfe, ptr addrspace(1) undef
657  ret <3 x float> %tex
658}
659
660define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
661  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1110
662  ; GCN: bb.1 (%ir-block.0):
663  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
664  ; GCN-NEXT: {{  $}}
665  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
666  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
667  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
668  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
669  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
670  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
671  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
672  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
673  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
674  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
675  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
676  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
677  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
678  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 8)
679  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
680  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
681  ; GCN-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
682  ; GCN-NEXT:   $vgpr0 = COPY [[UV4]](s32)
683  ; GCN-NEXT:   $vgpr1 = COPY [[UV5]](s32)
684  ; GCN-NEXT:   $vgpr2 = COPY [[UV6]](s32)
685  ; GCN-NEXT:   $vgpr3 = COPY [[UV7]](s32)
686  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
687  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
688  %tex = extractvalue { <4 x float>, i32 } %res, 0
689  %tfe = extractvalue { <4 x float>, i32 } %res, 1
690  store i32 %tfe, ptr addrspace(1) undef
691  ret <4 x float> %tex
692}
693
694define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
695  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1100
696  ; GCN: bb.1 (%ir-block.0):
697  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
698  ; GCN-NEXT: {{  $}}
699  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
700  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
701  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
702  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
703  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
704  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
705  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
706  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
707  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
708  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
709  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
710  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
711  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
712  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 8)
713  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
714  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
715  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
716  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
717  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
718  ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
719  ; GCN-NEXT:   $vgpr3 = COPY [[DEF1]](s32)
720  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
721  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
722  %tex = extractvalue { <4 x float>, i32 } %res, 0
723  %tfe = extractvalue { <4 x float>, i32 } %res, 1
724  store i32 %tfe, ptr addrspace(1) undef
725  ret <4 x float> %tex
726}
727
728define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
729  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_1000
730  ; GCN: bb.1 (%ir-block.0):
731  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
732  ; GCN-NEXT: {{  $}}
733  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
734  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
735  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
736  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
737  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
738  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
739  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
740  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
741  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
742  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
743  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
744  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
745  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
746  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
747  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
748  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
749  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
750  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
751  ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
752  ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
753  ; GCN-NEXT:   $vgpr3 = COPY [[DEF1]](s32)
754  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
755  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
756  %tex = extractvalue { <4 x float>, i32 } %res, 0
757  %tfe = extractvalue { <4 x float>, i32 } %res, 1
758  store i32 %tfe, ptr addrspace(1) undef
759  ret <4 x float> %tex
760}
761
762define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
763  ; GCN-LABEL: name: image_load_tfe_v4f32_dmask_0000
764  ; GCN: bb.1 (%ir-block.0):
765  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
766  ; GCN-NEXT: {{  $}}
767  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
768  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
769  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
770  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
771  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
772  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
773  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
774  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
775  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
776  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
777  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
778  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
779  ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
780  ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 8)
781  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
782  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
783  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
784  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
785  ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
786  ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
787  ; GCN-NEXT:   $vgpr3 = COPY [[DEF1]](s32)
788  ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
789  %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
790  %tex = extractvalue { <4 x float>, i32 } %res, 0
791  %tfe = extractvalue { <4 x float>, i32 } %res, 1
792  store i32 %tfe, ptr addrspace(1) undef
793  ret <4 x float> %tex
794}
795
796declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
797declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
798declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
799declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
800declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
801declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
802declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
803declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
804
805attributes #0 = { nounwind readonly }
806