xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll (revision 8f6a1a07cb85980013c70d5af6d28f5fcf75e732)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s
4
5define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6  ; UNPACKED-LABEL: name: image_load_f16
7  ; UNPACKED: bb.1 (%ir-block.0):
8  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9  ; UNPACKED-NEXT: {{  $}}
10  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
11  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
12  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
13  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
14  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
15  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
16  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
17  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
18  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
20  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
21  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
22  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
23  ; UNPACKED-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16)
24  ; UNPACKED-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
25  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
26  ;
27  ; PACKED-LABEL: name: image_load_f16
28  ; PACKED: bb.1 (%ir-block.0):
29  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
30  ; PACKED-NEXT: {{  $}}
31  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
32  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
33  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
34  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
35  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
36  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
37  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
38  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
39  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
40  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
41  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
42  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
43  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
44  ; PACKED-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16)
45  ; PACKED-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
46  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
47  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
48  ret half %tex
49}
50
51define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
52  ; UNPACKED-LABEL: name: image_load_v2f16
53  ; UNPACKED: bb.1 (%ir-block.0):
54  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
55  ; UNPACKED-NEXT: {{  $}}
56  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
57  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
58  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
59  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
60  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
61  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
62  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
63  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
64  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
65  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
66  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
67  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
68  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
69  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
70  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
71  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
72  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
73  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
74  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
75  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
76  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
77  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
78  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
79  ;
80  ; PACKED-LABEL: name: image_load_v2f16
81  ; PACKED: bb.1 (%ir-block.0):
82  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
83  ; PACKED-NEXT: {{  $}}
84  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
85  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
86  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
87  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
88  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
89  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
90  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
91  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
92  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
93  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
94  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
95  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
96  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
97  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
98  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
99  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
100  ret <2 x half> %tex
101}
102
103define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
104  ; UNPACKED-LABEL: name: image_load_v3f16
105  ; UNPACKED: bb.1 (%ir-block.0):
106  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
107  ; UNPACKED-NEXT: {{  $}}
108  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
109  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
110  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
111  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
112  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
113  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
114  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
115  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
116  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
117  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
118  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
119  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
120  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
121  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
122  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
123  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
124  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
125  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
126  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
127  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
128  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
129  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
130  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
131  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
132  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
133  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
134  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
135  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
136  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
137  ;
138  ; PACKED-LABEL: name: image_load_v3f16
139  ; PACKED: bb.1 (%ir-block.0):
140  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
141  ; PACKED-NEXT: {{  $}}
142  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
143  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
144  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
145  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
146  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
147  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
148  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
149  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
150  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
151  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
152  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
153  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
154  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
155  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
156  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
157  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
158  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
159  ; PACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
160  ; PACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
161  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
162  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
163  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
164  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
165  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
166  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
167  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
168  ret <3 x half> %tex
169}
170
171define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
172  ; UNPACKED-LABEL: name: image_load_v4f16
173  ; UNPACKED: bb.1 (%ir-block.0):
174  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
175  ; UNPACKED-NEXT: {{  $}}
176  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
177  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
178  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
179  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
180  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
181  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
182  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
183  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
184  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
185  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
186  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
187  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
188  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
189  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
190  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
191  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
192  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
193  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
194  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
195  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
196  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
197  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
198  ; UNPACKED-NEXT:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
199  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
200  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
201  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
202  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
203  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
204  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
205  ;
206  ; PACKED-LABEL: name: image_load_v4f16
207  ; PACKED: bb.1 (%ir-block.0):
208  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
209  ; PACKED-NEXT: {{  $}}
210  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
211  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
212  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
213  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
214  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
215  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
216  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
217  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
218  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
219  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
220  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
221  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
222  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
223  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
224  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
225  ; PACKED-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
226  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
227  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
228  ret <4 x half> %tex
229}
230
231define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
232  ; UNPACKED-LABEL: name: image_load_tfe_f16
233  ; UNPACKED: bb.1 (%ir-block.0):
234  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
235  ; UNPACKED-NEXT: {{  $}}
236  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
237  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
238  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
239  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
240  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
241  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
242  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
243  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
244  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
245  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
246  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
247  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
248  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
249  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
250  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
251  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
252  ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
253  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
254  ;
255  ; PACKED-LABEL: name: image_load_tfe_f16
256  ; PACKED: bb.1 (%ir-block.0):
257  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
258  ; PACKED-NEXT: {{  $}}
259  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
260  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
261  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
262  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
263  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
264  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
265  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
266  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
267  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
268  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
269  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
270  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
271  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
272  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
273  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
274  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
275  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
276  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
277  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
278  %tex = extractvalue { half, i32 } %res, 0
279  %tfe = extractvalue { half, i32 } %res, 1
280  store i32 %tfe, ptr addrspace(1) undef
281  ret half %tex
282}
283
284define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
285  ; UNPACKED-LABEL: name: image_load_tfe_v2f16
286  ; UNPACKED: bb.1 (%ir-block.0):
287  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
288  ; UNPACKED-NEXT: {{  $}}
289  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
290  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
291  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
292  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
293  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
294  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
295  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
296  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
297  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
298  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
299  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
300  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
301  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
302  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
303  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
304  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
305  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
306  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
307  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
308  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
309  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
310  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
311  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
312  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
313  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
314  ;
315  ; PACKED-LABEL: name: image_load_tfe_v2f16
316  ; PACKED: bb.1 (%ir-block.0):
317  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
318  ; PACKED-NEXT: {{  $}}
319  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
320  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
321  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
322  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
323  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
324  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
325  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
326  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
327  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
328  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
329  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
330  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
331  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
332  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
333  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
334  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
335  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
336  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
337  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
338  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
339  %tex = extractvalue { <2 x half>, i32 } %res, 0
340  %tfe = extractvalue { <2 x half>, i32 } %res, 1
341  store i32 %tfe, ptr addrspace(1) undef
342  ret <2 x half> %tex
343}
344
345define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
346  ; UNPACKED-LABEL: name: image_load_tfe_v3f16
347  ; UNPACKED: bb.1 (%ir-block.0):
348  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
349  ; UNPACKED-NEXT: {{  $}}
350  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
351  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
352  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
353  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
354  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
355  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
356  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
357  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
358  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
359  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
360  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
361  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
362  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
363  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
364  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
365  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
366  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
367  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
368  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
369  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
370  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
371  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
372  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
373  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
374  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
375  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
376  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
377  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
378  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
379  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
380  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
381  ;
382  ; PACKED-LABEL: name: image_load_tfe_v3f16
383  ; PACKED: bb.1 (%ir-block.0):
384  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
385  ; PACKED-NEXT: {{  $}}
386  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
387  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
388  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
389  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
390  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
391  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
392  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
393  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
394  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
395  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
396  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
397  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
398  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
399  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
400  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
401  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
402  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
403  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
404  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
405  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
406  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
407  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
408  %tex = extractvalue { <3 x half>, i32 } %res, 0
409  %tfe = extractvalue { <3 x half>, i32 } %res, 1
410  store i32 %tfe, ptr addrspace(1) undef
411  ret <3 x half> %tex
412}
413
414define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
415  ; UNPACKED-LABEL: name: image_load_tfe_v4f16
416  ; UNPACKED: bb.1 (%ir-block.0):
417  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
418  ; UNPACKED-NEXT: {{  $}}
419  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
420  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
421  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
422  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
423  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
424  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
425  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
426  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
427  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
428  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
429  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
430  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
431  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
432  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
433  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>)
434  ; UNPACKED-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
435  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
436  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
437  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
438  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
439  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
440  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
441  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
442  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
443  ; UNPACKED-NEXT:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
444  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
445  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
446  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
447  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
448  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
449  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
450  ;
451  ; PACKED-LABEL: name: image_load_tfe_v4f16
452  ; PACKED: bb.1 (%ir-block.0):
453  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
454  ; PACKED-NEXT: {{  $}}
455  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
456  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
457  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
458  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
459  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
460  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
461  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
462  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
463  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
464  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
465  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
466  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
467  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
468  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
469  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
470  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
471  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
472  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
473  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
474  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
475  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
476  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
477  %tex = extractvalue { <4 x half>, i32 } %res, 0
478  %tfe = extractvalue { <4 x half>, i32 } %res, 1
479  store i32 %tfe, ptr addrspace(1) undef
480  ret <4 x half> %tex
481}
482
483define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
484  ; UNPACKED-LABEL: name: image_load_f16_dmask_0000
485  ; UNPACKED: bb.1 (%ir-block.0):
486  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
487  ; UNPACKED-NEXT: {{  $}}
488  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
489  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
490  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
491  ; UNPACKED-NEXT:   $vgpr0 = COPY [[DEF]](s32)
492  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
493  ;
494  ; PACKED-LABEL: name: image_load_f16_dmask_0000
495  ; PACKED: bb.1 (%ir-block.0):
496  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
497  ; PACKED-NEXT: {{  $}}
498  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
499  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
500  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
501  ; PACKED-NEXT:   $vgpr0 = COPY [[DEF]](s32)
502  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
503  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
504  ret half %tex
505}
506
507define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
508  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000
509  ; UNPACKED: bb.1 (%ir-block.0):
510  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
511  ; UNPACKED-NEXT: {{  $}}
512  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
513  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
514  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
515  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
516  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
517  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
518  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
519  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
520  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
521  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
522  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
523  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
524  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
525  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
526  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
527  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
528  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
529  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
530  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
531  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
532  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
533  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
534  ;
535  ; PACKED-LABEL: name: image_load_v2f16_dmask_1000
536  ; PACKED: bb.1 (%ir-block.0):
537  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
538  ; PACKED-NEXT: {{  $}}
539  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
540  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
541  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
542  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
543  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
544  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
545  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
546  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
547  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
548  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
549  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
550  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
551  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
552  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
553  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
554  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
555  ret <2 x half> %tex
556}
557
558define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
559  ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000
560  ; UNPACKED: bb.1 (%ir-block.0):
561  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
562  ; UNPACKED-NEXT: {{  $}}
563  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
564  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
565  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
566  ; UNPACKED-NEXT:   $vgpr0 = COPY [[DEF]](<2 x s16>)
567  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
568  ;
569  ; PACKED-LABEL: name: image_load_v2f16_dmask_0000
570  ; PACKED: bb.1 (%ir-block.0):
571  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
572  ; PACKED-NEXT: {{  $}}
573  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
574  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
575  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
576  ; PACKED-NEXT:   $vgpr0 = COPY [[DEF]](<2 x s16>)
577  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
578  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
579  ret <2 x half> %tex
580}
581
582define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
583  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100
584  ; UNPACKED: bb.1 (%ir-block.0):
585  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
586  ; UNPACKED-NEXT: {{  $}}
587  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
588  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
589  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
590  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
591  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
592  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
593  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
594  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
595  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
596  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
597  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
598  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
599  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
600  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
601  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
602  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
603  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
604  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
605  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
606  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
607  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
608  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
609  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
610  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
611  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
612  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
613  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
614  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
615  ;
616  ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
617  ; PACKED: bb.1 (%ir-block.0):
618  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
619  ; PACKED-NEXT: {{  $}}
620  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
621  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
622  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
623  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
624  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
625  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
626  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
627  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
628  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
629  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
630  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
631  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
632  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
633  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
634  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
635  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
636  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
637  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
638  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
639  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST]](<2 x s16>)
640  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
641  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
642  ret <3 x half> %tex
643}
644
645define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
646  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000
647  ; UNPACKED: bb.1 (%ir-block.0):
648  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
649  ; UNPACKED-NEXT: {{  $}}
650  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
651  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
652  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
653  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
654  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
655  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
656  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
657  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
658  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
659  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
660  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
661  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
662  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
663  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
664  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
665  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
666  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
667  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
668  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
669  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
670  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
671  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
672  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
673  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
674  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
675  ;
676  ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
677  ; PACKED: bb.1 (%ir-block.0):
678  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
679  ; PACKED-NEXT: {{  $}}
680  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
681  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
682  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
683  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
684  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
685  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
686  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
687  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
688  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
689  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
690  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
691  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
692  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
693  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
694  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
695  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
696  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
697  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
698  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
699  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST]](<2 x s16>)
700  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
701  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
702  ret <3 x half> %tex
703}
704
705define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
706  ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000
707  ; UNPACKED: bb.1 (%ir-block.0):
708  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
709  ; UNPACKED-NEXT: {{  $}}
710  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
711  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
712  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
713  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
714  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
715  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
716  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
717  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
718  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
719  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
720  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
721  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
722  ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
723  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
724  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
725  ;
726  ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
727  ; PACKED: bb.1 (%ir-block.0):
728  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
729  ; PACKED-NEXT: {{  $}}
730  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
731  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
732  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
733  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
734  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
735  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
736  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
737  ; PACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
738  ; PACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
739  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
740  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
741  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
742  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
743  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
744  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
745  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
746  ret <3 x half> %tex
747}
748
749define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
750  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110
751  ; UNPACKED: bb.1 (%ir-block.0):
752  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
753  ; UNPACKED-NEXT: {{  $}}
754  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
755  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
756  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
757  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
758  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
759  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
760  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
761  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
762  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
763  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
764  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
765  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
766  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
767  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
768  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
769  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
770  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
771  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
772  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
773  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
774  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
775  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
776  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
777  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
778  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
779  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
780  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
781  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
782  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
783  ;
784  ; PACKED-LABEL: name: image_load_v4f16_dmask_1110
785  ; PACKED: bb.1 (%ir-block.0):
786  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
787  ; PACKED-NEXT: {{  $}}
788  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
789  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
790  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
791  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
792  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
793  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
794  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
795  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
796  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
797  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
798  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
799  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
800  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
801  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
802  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
803  ; PACKED-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
804  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
805  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
806  ret <4 x half> %tex
807}
808
809define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
810  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100
811  ; UNPACKED: bb.1 (%ir-block.0):
812  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
813  ; UNPACKED-NEXT: {{  $}}
814  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
815  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
816  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
817  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
818  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
819  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
820  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
821  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
822  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
823  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
824  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
825  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
826  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
827  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
828  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
829  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
830  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
831  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
832  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
833  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
834  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
835  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
836  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
837  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
838  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
839  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
840  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
841  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
842  ;
843  ; PACKED-LABEL: name: image_load_v4f16_dmask_1100
844  ; PACKED: bb.1 (%ir-block.0):
845  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
846  ; PACKED-NEXT: {{  $}}
847  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
848  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
849  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
850  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
851  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
852  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
853  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
854  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
855  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
856  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
857  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
858  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
859  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
860  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
861  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
862  ; PACKED-NEXT:   $vgpr1 = COPY [[DEF]](<2 x s16>)
863  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
864  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
865  ret <4 x half> %tex
866}
867
868define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
869  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000
870  ; UNPACKED: bb.1 (%ir-block.0):
871  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
872  ; UNPACKED-NEXT: {{  $}}
873  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
874  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
875  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
876  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
877  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
878  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
879  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
880  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
881  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
882  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
883  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
884  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
885  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
886  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
887  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
888  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
889  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
890  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
891  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
892  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
893  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
894  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
895  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
896  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
897  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
898  ;
899  ; PACKED-LABEL: name: image_load_v4f16_dmask_1000
900  ; PACKED: bb.1 (%ir-block.0):
901  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
902  ; PACKED-NEXT: {{  $}}
903  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
904  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
905  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
906  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
907  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
908  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
909  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
910  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
911  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
912  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
913  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
914  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
915  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
916  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
917  ; PACKED-NEXT:   $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
918  ; PACKED-NEXT:   $vgpr1 = COPY [[DEF]](<2 x s16>)
919  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
920  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
921  ret <4 x half> %tex
922}
923
924define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
925  ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000
926  ; UNPACKED: bb.1 (%ir-block.0):
927  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
928  ; UNPACKED-NEXT: {{  $}}
929  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
930  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
931  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
932  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
933  ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
934  ; UNPACKED-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
935  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
936  ;
937  ; PACKED-LABEL: name: image_load_v4f16_dmask_0000
938  ; PACKED: bb.1 (%ir-block.0):
939  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
940  ; PACKED-NEXT: {{  $}}
941  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
942  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
943  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
944  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
945  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
946  ; PACKED-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
947  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
948  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
949  ret <4 x half> %tex
950}
951
952define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
953  ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000
954  ; UNPACKED: bb.1 (%ir-block.0):
955  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
956  ; UNPACKED-NEXT: {{  $}}
957  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
958  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
959  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
960  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
961  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
962  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
963  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
964  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
965  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
966  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
967  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
968  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
969  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
970  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
971  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
972  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
973  ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
974  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
975  ;
976  ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
977  ; PACKED: bb.1 (%ir-block.0):
978  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
979  ; PACKED-NEXT: {{  $}}
980  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
981  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
982  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
983  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
984  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
985  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
986  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
987  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
988  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
989  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
990  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
991  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
992  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
993  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
994  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
995  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
996  ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
997  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
998  %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
999  %tex = extractvalue { half, i32 } %res, 0
1000  %tfe = extractvalue { half, i32 } %res, 1
1001  store i32 %tfe, ptr addrspace(1) undef
1002  ret half %tex
1003}
1004
1005define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1006  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1007  ; UNPACKED: bb.1 (%ir-block.0):
1008  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1009  ; UNPACKED-NEXT: {{  $}}
1010  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1011  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1012  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1013  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1014  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1015  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1016  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1017  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1018  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1019  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1020  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1021  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1022  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1023  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1024  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1025  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1026  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1027  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1028  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1029  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1030  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1031  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1032  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1033  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1034  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1035  ;
1036  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1037  ; PACKED: bb.1 (%ir-block.0):
1038  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1039  ; PACKED-NEXT: {{  $}}
1040  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1041  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1042  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1043  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1044  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1045  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1046  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1047  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1048  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1049  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1050  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1051  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1052  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1053  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1054  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1055  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1056  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1057  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1058  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1059  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1060  %tex = extractvalue { <2 x half>, i32 } %res, 0
1061  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1062  store i32 %tfe, ptr addrspace(1) undef
1063  ret <2 x half> %tex
1064}
1065
1066define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1067  ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1068  ; UNPACKED: bb.1 (%ir-block.0):
1069  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1070  ; UNPACKED-NEXT: {{  $}}
1071  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1072  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1073  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1074  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1075  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1076  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1077  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1078  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1079  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1080  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1081  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1082  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1083  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1084  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1085  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1086  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1087  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1088  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1089  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1090  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1091  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1092  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1093  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1094  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1095  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1096  ;
1097  ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1098  ; PACKED: bb.1 (%ir-block.0):
1099  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1100  ; PACKED-NEXT: {{  $}}
1101  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1102  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1103  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1104  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1105  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1106  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1107  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1108  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1109  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1110  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1111  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1112  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1113  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1114  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1115  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1116  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1117  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1118  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1119  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
1120  %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1121  %tex = extractvalue { <2 x half>, i32 } %res, 0
1122  %tfe = extractvalue { <2 x half>, i32 } %res, 1
1123  store i32 %tfe, ptr addrspace(1) undef
1124  ret <2 x half> %tex
1125}
1126
1127define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1128  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1129  ; UNPACKED: bb.1 (%ir-block.0):
1130  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1131  ; UNPACKED-NEXT: {{  $}}
1132  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1133  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1134  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1135  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1136  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1137  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1138  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1139  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1140  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1141  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1142  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1143  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1144  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1145  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1146  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1147  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1148  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1149  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1150  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1151  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1152  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1153  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1154  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1155  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1156  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1157  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1158  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1159  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1160  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1161  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1162  ;
1163  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1164  ; PACKED: bb.1 (%ir-block.0):
1165  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1166  ; PACKED-NEXT: {{  $}}
1167  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1168  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1169  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1170  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1171  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1172  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1173  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1174  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1175  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1176  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1177  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1178  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1179  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1180  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1181  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1182  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1183  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1184  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1185  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1186  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1187  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1188  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1189  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1190  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1191  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1192  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1193  %tex = extractvalue { <3 x half>, i32 } %res, 0
1194  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1195  store i32 %tfe, ptr addrspace(1) undef
1196  ret <3 x half> %tex
1197}
1198
1199define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1200  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1201  ; UNPACKED: bb.1 (%ir-block.0):
1202  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1203  ; UNPACKED-NEXT: {{  $}}
1204  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1205  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1206  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1207  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1208  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1209  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1210  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1211  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1212  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1213  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1214  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1215  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1216  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1217  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1218  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1219  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1220  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1221  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1222  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1223  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1224  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1225  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1226  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1227  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1228  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1229  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1230  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1231  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1232  ;
1233  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1234  ; PACKED: bb.1 (%ir-block.0):
1235  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1236  ; PACKED-NEXT: {{  $}}
1237  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1238  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1239  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1240  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1241  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1242  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1243  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1244  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1245  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1246  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1247  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1248  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1249  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1250  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1251  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1252  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1253  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1254  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1255  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1256  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1257  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1258  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1259  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1260  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1261  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1262  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1263  %tex = extractvalue { <3 x half>, i32 } %res, 0
1264  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1265  store i32 %tfe, ptr addrspace(1) undef
1266  ret <3 x half> %tex
1267}
1268
1269define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1270  ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1271  ; UNPACKED: bb.1 (%ir-block.0):
1272  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1273  ; UNPACKED-NEXT: {{  $}}
1274  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1275  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1276  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1277  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1278  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1279  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1280  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1281  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1282  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1283  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1284  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1285  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1286  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1287  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1288  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1289  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1290  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1291  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1292  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1293  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1294  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1295  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1296  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1297  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1298  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1299  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1300  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1301  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1302  ;
1303  ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1304  ; PACKED: bb.1 (%ir-block.0):
1305  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1306  ; PACKED-NEXT: {{  $}}
1307  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1308  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1309  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1310  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1311  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1312  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1313  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1314  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1315  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1316  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1317  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1318  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1319  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1320  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1321  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1322  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1323  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1324  ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1325  ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1326  ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1327  ; PACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1328  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1329  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1330  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1331  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1332  %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1333  %tex = extractvalue { <3 x half>, i32 } %res, 0
1334  %tfe = extractvalue { <3 x half>, i32 } %res, 1
1335  store i32 %tfe, ptr addrspace(1) undef
1336  ret <3 x half> %tex
1337}
1338
1339define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1340  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1341  ; UNPACKED: bb.1 (%ir-block.0):
1342  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1343  ; UNPACKED-NEXT: {{  $}}
1344  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1345  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1346  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1347  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1348  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1349  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1350  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1351  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1352  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1353  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1354  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1355  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1356  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1357  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
1358  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
1359  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1360  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1361  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1362  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1363  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1364  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1365  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1366  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1367  ; UNPACKED-NEXT:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
1368  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1369  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1370  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1371  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1372  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1373  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1374  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1375  ;
1376  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1377  ; PACKED: bb.1 (%ir-block.0):
1378  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1379  ; PACKED-NEXT: {{  $}}
1380  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1381  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1382  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1383  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1384  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1385  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1386  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1387  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1388  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1389  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1390  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1391  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1392  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1393  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
1394  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1395  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1396  ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
1397  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1398  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1399  ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1400  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1401  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1402  %tex = extractvalue { <4 x half>, i32 } %res, 0
1403  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1404  store i32 %tfe, ptr addrspace(1) undef
1405  ret <4 x half> %tex
1406}
1407
1408define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1409  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1410  ; UNPACKED: bb.1 (%ir-block.0):
1411  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1412  ; UNPACKED-NEXT: {{  $}}
1413  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1414  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1415  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1416  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1417  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1418  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1419  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1420  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1421  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1422  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1423  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1424  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1425  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1426  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1427  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1428  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1429  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1430  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1431  ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1432  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1433  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1434  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1435  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1436  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1437  ; UNPACKED-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1438  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1439  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1440  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1441  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1442  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1443  ;
1444  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1445  ; PACKED: bb.1 (%ir-block.0):
1446  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1447  ; PACKED-NEXT: {{  $}}
1448  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1449  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1450  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1451  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1452  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1453  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1454  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1455  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1456  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1457  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1458  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1459  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1460  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1461  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1462  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1463  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1464  ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1465  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1466  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1467  ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1468  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1469  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1470  %tex = extractvalue { <4 x half>, i32 } %res, 0
1471  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1472  store i32 %tfe, ptr addrspace(1) undef
1473  ret <4 x half> %tex
1474}
1475
1476define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1477  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1478  ; UNPACKED: bb.1 (%ir-block.0):
1479  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1480  ; UNPACKED-NEXT: {{  $}}
1481  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1482  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1483  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1484  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1485  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1486  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1487  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1488  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1489  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1490  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1491  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1492  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1493  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1494  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1495  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1496  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1497  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1498  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1499  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1500  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1501  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1502  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1503  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1504  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1505  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1506  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1507  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1508  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1509  ;
1510  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1511  ; PACKED: bb.1 (%ir-block.0):
1512  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1513  ; PACKED-NEXT: {{  $}}
1514  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1515  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1516  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1517  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1518  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1519  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1520  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1521  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1522  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1523  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1524  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1525  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1526  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1527  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1528  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1529  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1530  ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1531  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1532  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1533  ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1534  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1535  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1536  %tex = extractvalue { <4 x half>, i32 } %res, 0
1537  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1538  store i32 %tfe, ptr addrspace(1) undef
1539  ret <4 x half> %tex
1540}
1541
1542define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1543  ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1544  ; UNPACKED: bb.1 (%ir-block.0):
1545  ; UNPACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1546  ; UNPACKED-NEXT: {{  $}}
1547  ; UNPACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1548  ; UNPACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1549  ; UNPACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1550  ; UNPACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1551  ; UNPACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1552  ; UNPACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1553  ; UNPACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1554  ; UNPACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1555  ; UNPACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1556  ; UNPACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1557  ; UNPACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1558  ; UNPACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1559  ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1560  ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1561  ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1562  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1563  ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1564  ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1565  ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1566  ; UNPACKED-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1567  ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1568  ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1569  ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1570  ; UNPACKED-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1571  ; UNPACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1572  ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1573  ; UNPACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1574  ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1575  ;
1576  ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1577  ; PACKED: bb.1 (%ir-block.0):
1578  ; PACKED-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1579  ; PACKED-NEXT: {{  $}}
1580  ; PACKED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1581  ; PACKED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1582  ; PACKED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1583  ; PACKED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1584  ; PACKED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1585  ; PACKED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1586  ; PACKED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1587  ; PACKED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1588  ; PACKED-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1589  ; PACKED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1590  ; PACKED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1591  ; PACKED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1592  ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1593  ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1594  ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1595  ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1596  ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1597  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1598  ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1599  ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
1600  ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1601  %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1602  %tex = extractvalue { <4 x half>, i32 } %res, 0
1603  %tfe = extractvalue { <4 x half>, i32 } %res, 1
1604  store i32 %tfe, ptr addrspace(1) undef
1605  ret <4 x half> %tex
1606}
1607
1608declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1609declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1610declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1611declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1612declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1613declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1614declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1615declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1616
1617attributes #0 = { nounwind readonly }
1618