xref: /llvm-project/llvm/test/CodeGen/AMDGPU/immv216.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
6; FIXME: Merge into imm.ll
7
8; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
9; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
10; GCN: buffer_store_{{dword|b32}} [[REG]]
11define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(ptr addrspace(1) %out) #0 {
12  store <2 x i16> <i16 -32768, i16 -32768>, ptr addrspace(1) %out
13  ret void
14}
15
16; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
17; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding
18; GCN: buffer_store_{{dword|b32}} [[REG]]
19define amdgpu_kernel void @store_inline_imm_0.0_v2f16(ptr addrspace(1) %out) #0 {
20  store <2 x half> <half 0.0, half 0.0>, ptr addrspace(1) %out
21  ret void
22}
23
24; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
25; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
26; GCN: buffer_store_{{dword|b32}} [[REG]]
27define amdgpu_kernel void @store_imm_neg_0.0_v2f16(ptr addrspace(1) %out) #0 {
28  store <2 x half> <half -0.0, half -0.0>, ptr addrspace(1) %out
29  ret void
30}
31
32; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
33; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding
34; GCN: buffer_store_{{dword|b32}} [[REG]]
35define amdgpu_kernel void @store_inline_imm_0.5_v2f16(ptr addrspace(1) %out) #0 {
36  store <2 x half> <half 0.5, half 0.5>, ptr addrspace(1) %out
37  ret void
38}
39
40; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
41; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding
42; GCN: buffer_store_{{dword|b32}} [[REG]]
43define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(ptr addrspace(1) %out) #0 {
44  store <2 x half> <half -0.5, half -0.5>, ptr addrspace(1) %out
45  ret void
46}
47
48; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
49; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding
50; GCN: buffer_store_{{dword|b32}} [[REG]]
51define amdgpu_kernel void @store_inline_imm_1.0_v2f16(ptr addrspace(1) %out) #0 {
52  store <2 x half> <half 1.0, half 1.0>, ptr addrspace(1) %out
53  ret void
54}
55
56; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
57; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding
58; GCN: buffer_store_{{dword|b32}} [[REG]]
59define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(ptr addrspace(1) %out) #0 {
60  store <2 x half> <half -1.0, half -1.0>, ptr addrspace(1) %out
61  ret void
62}
63
64; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
65; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding
66; GCN: buffer_store_{{dword|b32}} [[REG]]
67define amdgpu_kernel void @store_inline_imm_2.0_v2f16(ptr addrspace(1) %out) #0 {
68  store <2 x half> <half 2.0, half 2.0>, ptr addrspace(1) %out
69  ret void
70}
71
72; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
73; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding
74; GCN: buffer_store_{{dword|b32}} [[REG]]
75define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(ptr addrspace(1) %out) #0 {
76  store <2 x half> <half -2.0, half -2.0>, ptr addrspace(1) %out
77  ret void
78}
79
80; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
81; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding
82; GCN: buffer_store_{{dword|b32}} [[REG]]
83define amdgpu_kernel void @store_inline_imm_4.0_v2f16(ptr addrspace(1) %out) #0 {
84  store <2 x half> <half 4.0, half 4.0>, ptr addrspace(1) %out
85  ret void
86}
87
88; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
89; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding
90; GCN: buffer_store_{{dword|b32}} [[REG]]
91define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(ptr addrspace(1) %out) #0 {
92  store <2 x half> <half -4.0, half -4.0>, ptr addrspace(1) %out
93  ret void
94}
95
96; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
97; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding
98; GCN: buffer_store_{{dword|b32}} [[REG]]
99define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(ptr addrspace(1) %out) #0 {
100  store <2 x half> <half 0xH3118, half 0xH3118>, ptr addrspace(1) %out
101  ret void
102}
103
104; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
105; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding
106; GCN: buffer_store_{{dword|b32}} [[REG]]
107define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(ptr addrspace(1) %out) #0 {
108  store <2 x half> <half 0xHB118, half 0xHB118>, ptr addrspace(1) %out
109  ret void
110}
111
112; GCN-LABEL: {{^}}store_literal_imm_v2f16:
113; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
114; GCN: buffer_store_{{dword|b32}} [[REG]]
115define amdgpu_kernel void @store_literal_imm_v2f16(ptr addrspace(1) %out) #0 {
116  store <2 x half> <half 4096.0, half 4096.0>, ptr addrspace(1) %out
117  ret void
118}
119
120; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
121; GFX9: s_load_dword [[VAL:s[0-9]+]]
122; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0 ; encoding
123; GFX9: buffer_store_dword [[REG]]
124
125; FIXME: Shouldn't need right shift and SDWA, also extra copy
126; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
127; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
128; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
129; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
130
131; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
132; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0
133; VI: v_or_b32
134; VI: buffer_store_dword
135define amdgpu_kernel void @add_inline_imm_0.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
136  %y = fadd <2 x half> %x, <half 0.0, half 0.0>
137  store <2 x half> %y, ptr addrspace(1) %out
138  ret void
139}
140
141; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
142; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
143; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe0,0x01,0x08]
144; GFX10: buffer_store_{{dword|b32}} [[REG]]
145
146; GFX9: s_load_dword [[VAL:s[0-9]+]]
147; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe0,0x01,0x08]
148; GFX9: buffer_store_dword [[REG]]
149
150; FIXME: Shouldn't need right shift and SDWA, also extra copy
151; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
152; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
153; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
154; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
155
156; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
157; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5
158; VI: v_or_b32
159; VI: buffer_store_dword
160define amdgpu_kernel void @add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
161  %y = fadd <2 x half> %x, <half 0.5, half 0.5>
162  store <2 x half> %y, ptr addrspace(1) %out
163  ret void
164}
165
166; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
167; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
168; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe2,0x01,0x08]
169; GFX10: buffer_store_{{dword|b32}} [[REG]]
170
171; GFX9: s_load_dword [[VAL:s[0-9]+]]
172; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe2,0x01,0x08]
173; GFX9: buffer_store_dword [[REG]]
174
175; FIXME: Shouldn't need right shift and SDWA, also extra copy
176; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
177; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
178; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
179; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
180
181; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
182; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5
183; VI: v_or_b32
184; VI: buffer_store_dword
185define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
186  %y = fadd <2 x half> %x, <half -0.5, half -0.5>
187  store <2 x half> %y, ptr addrspace(1) %out
188  ret void
189}
190
191; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
192; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
193; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding:
194; GFX10: buffer_store_{{dword|b32}} [[REG]]
195
196; GFX9: s_load_dword [[VAL:s[0-9]+]]
197; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding
198; GFX9: buffer_store_dword [[REG]]
199
200; FIXME: Shouldn't need right shift and SDWA, also extra copy
201; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
202; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
203; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
204; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
205
206; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
207; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0
208; VI: v_or_b32
209; VI: buffer_store_dword
210define amdgpu_kernel void @add_inline_imm_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
211  %y = fadd <2 x half> %x, <half 1.0, half 1.0>
212  store <2 x half> %y, ptr addrspace(1) %out
213  ret void
214}
215
216; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
217; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
218; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
219; GFX10: buffer_store_{{dword|b32}} [[REG]]
220
221; GFX9: s_load_dword [[VAL:s[0-9]+]]
222; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
223; GFX9: buffer_store_dword [[REG]]
224
225
226; FIXME: Shouldn't need right shift and SDWA, also extra copy
227; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
228; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0xbc00
229; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
230; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
231
232; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
233; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0
234; VI: v_or_b32
235; VI: buffer_store_dword
236define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
237  %y = fadd <2 x half> %x, <half -1.0, half -1.0>
238  store <2 x half> %y, ptr addrspace(1) %out
239  ret void
240}
241
242; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
243; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
244; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
245; GFX10: buffer_store_{{dword|b32}} [[REG]]
246
247; GFX9: s_load_dword [[VAL:s[0-9]+]]
248; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
249; GFX9: buffer_store_dword [[REG]]
250
251; FIXME: Shouldn't need right shift and SDWA, also extra copy
252; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
253; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
254; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
255; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
256
257; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
258; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0
259; VI: v_or_b32
260; VI: buffer_store_dword
261define amdgpu_kernel void @add_inline_imm_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
262  %y = fadd <2 x half> %x, <half 2.0, half 2.0>
263  store <2 x half> %y, ptr addrspace(1) %out
264  ret void
265}
266
267; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
268; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
269; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
270; GFX10: buffer_store_{{dword|b32}} [[REG]]
271
272; GFX9: s_load_dword [[VAL:s[0-9]+]]
273; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
274; GFX9: buffer_store_dword [[REG]]
275
276; FIXME: Shouldn't need right shift and SDWA, also extra copy
277; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
278; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
279; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
280; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
281
282; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
283; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0
284; VI: v_or_b32
285; VI: buffer_store_dword
286define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
287  %y = fadd <2 x half> %x, <half -2.0, half -2.0>
288  store <2 x half> %y, ptr addrspace(1) %out
289  ret void
290}
291
292; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
293; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
294; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
295; GFX10: buffer_store_{{dword|b32}} [[REG]]
296
297; GFX9: s_load_dword [[VAL:s[0-9]+]]
298; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
299; GFX9: buffer_store_dword [[REG]]
300
301; FIXME: Shouldn't need right shift and SDWA, also extra copy
302; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
303; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
304; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
305; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
306
307; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
308; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0
309; VI: v_or_b32
310; VI: buffer_store_dword
311define amdgpu_kernel void @add_inline_imm_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
312  %y = fadd <2 x half> %x, <half 4.0, half 4.0>
313  store <2 x half> %y, ptr addrspace(1) %out
314  ret void
315}
316
317; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
318; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
319; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
320; GFX10: buffer_store_{{dword|b32}} [[REG]]
321
322; GFX9: s_load_dword [[VAL:s[0-9]+]]
323; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
324; GFX9: buffer_store_dword [[REG]]
325
326; FIXME: Shouldn't need right shift and SDWA, also extra copy
327; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
328; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
329; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
330; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
331
332; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
333; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0
334; VI: v_or_b32
335; VI: buffer_store_dword
336define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
337  %y = fadd <2 x half> %x, <half -4.0, half -4.0>
338  store <2 x half> %y, ptr addrspace(1) %out
339  ret void
340}
341
342; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16:
343; GFX10: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]]
344; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
345; GFX10: buffer_store_{{dword|b32}} [[REG]]
346
347; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
348; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
349; GFX9: buffer_store_dword [[REG]]
350
351; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
352; VI-DAG: buffer_load_dword
353; VI-NOT: and
354; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
355; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
356; VI: v_or_b32
357; VI: buffer_store_dword
358define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
359  %x = load <2 x half>, ptr addrspace(1) %in
360  %y = fadd <2 x half> %x, <half 0.5, half 0.5>
361  store <2 x half> %y, ptr addrspace(1) %out
362  ret void
363}
364
365; GCN-LABEL: {{^}}commute_add_literal_v2f16:
366; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00]
367
368; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
369; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding
370; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x00,0x01,0x00,0x08]
371; GFX9: buffer_store_dword [[REG]]
372
373; VI-DAG: buffer_load_dword
374; VI-NOT: and
375; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0x6400, v{{[0-9]+}}
376; gfx8 does not support sreg or imm in sdwa - this will be move then
377; VI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x6400
378; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
379; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
380; VI: buffer_store_dword
381define amdgpu_kernel void @commute_add_literal_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
382  %x = load <2 x half>, ptr addrspace(1) %in
383  %y = fadd <2 x half> %x, <half 1024.0, half 1024.0>
384  store <2 x half> %y, ptr addrspace(1) %out
385  ret void
386}
387
388; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
389; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
390; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
391; GFX10: buffer_store_{{dword|b32}} [[REG]]
392
393; GFX9: s_load_dword [[VAL:s[0-9]+]]
394; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
395; GFX9: buffer_store_dword [[REG]]
396
397; FIXME: Shouldn't need right shift and SDWA, also extra copy
398; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
399; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 ; encoding
400; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
401; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
402
403; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
404; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1 ; encoding
405; VI: v_or_b32
406; VI: buffer_store_dword
407define amdgpu_kernel void @add_inline_imm_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
408  %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001>
409  store <2 x half> %y, ptr addrspace(1) %out
410  ret void
411}
412
413; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
414; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
415; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
416; GFX10: buffer_store_{{dword|b32}} [[REG]]
417
418; GFX9: s_load_dword [[VAL:s[0-9]+]]
419; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
420; GFX9: buffer_store_dword [[REG]]
421
422
423; FIXME: Shouldn't need right shift and SDWA, also extra copy
424; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
425; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 ; encoding
426; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
427; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
428
429; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
430; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2 ; encoding
431; VI: v_or_b32
432; VI: buffer_store_dword
433define amdgpu_kernel void @add_inline_imm_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
434  %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002>
435  store <2 x half> %y, ptr addrspace(1) %out
436  ret void
437}
438
439; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
440; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
441; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
442; GFX10: buffer_store_{{dword|b32}} [[REG]]
443
444; GFX9: s_load_dword [[VAL:s[0-9]+]]
445; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
446; GFX9: buffer_store_dword [[REG]]
447
448
449; FIXME: Shouldn't need right shift and SDWA, also extra copy
450; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
451; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 ; encoding
452; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
453; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
454
455; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
456; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16 ; encoding
457; VI: v_or_b32
458; VI: buffer_store_dword
459define amdgpu_kernel void @add_inline_imm_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
460  %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010>
461  store <2 x half> %y, ptr addrspace(1) %out
462  ret void
463}
464
465; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16:
466; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, -1
467; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
468; GFX10: buffer_store_{{dword|b32}} [[REG]]
469
470; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, -1
471; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
472; GFX9: buffer_store_dword [[REG]]
473
474; VI: s_load_dword [[VAL:s[0-9]+]]
475; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1 ; encoding
476; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
477; VI: buffer_store_dword [[REG]]
478define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
479  %xbc = bitcast <2 x half> %x to i32
480  %y = add i32 %xbc, -1
481  %ybc = bitcast i32 %y to <2 x half>
482  store <2 x half> %ybc, ptr addrspace(1) %out
483  ret void
484}
485
486; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16:
487; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfffefffe
488; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
489; GFX10: buffer_store_{{dword|b32}} [[REG]]
490
491; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfffefffe
492; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
493; GFX9: buffer_store_dword [[REG]]
494
495; VI: s_load_dword [[VAL:s[0-9]+]]
496; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe ; encoding
497; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
498; VI: buffer_store_dword [[REG]]
499define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
500  %xbc = bitcast <2 x half> %x to i32
501  %y = add i32 %xbc, 4294901758 ; 0xfffefffe
502  %ybc = bitcast i32 %y to <2 x half>
503  store <2 x half> %ybc, ptr addrspace(1) %out
504  ret void
505}
506
507; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16:
508; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfff0fff0
509; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
510; GFX10: buffer_store_{{dword|b32}} [[REG]]
511
512; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfff0fff0
513; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
514; GFX9: buffer_store_dword [[REG]]
515
516
517; VI: s_load_dword [[VAL:s[0-9]+]]
518; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0 ; encoding
519; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
520; VI: buffer_store_dword [[REG]]
521define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
522  %xbc = bitcast <2 x half> %x to i32
523  %y = add i32 %xbc, 4293984240 ; 0xfff0fff0
524  %ybc = bitcast i32 %y to <2 x half>
525  store <2 x half> %ybc, ptr addrspace(1) %out
526  ret void
527}
528
529; GCN-LABEL: {{^}}add_inline_imm_63_v2f16:
530; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
531; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
532; GFX10: buffer_store_{{dword|b32}} [[REG]]
533
534; GFX9: s_load_dword [[VAL:s[0-9]+]]
535; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
536; GFX9: buffer_store_dword [[REG]]
537
538; FIXME: Shouldn't need right shift and SDWA, also extra copy
539; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
540; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
541; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
542; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
543
544; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
545; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63
546; VI: v_or_b32
547; VI: buffer_store_dword
548define amdgpu_kernel void @add_inline_imm_63_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
549  %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F>
550  store <2 x half> %y, ptr addrspace(1) %out
551  ret void
552}
553
554; GCN-LABEL: {{^}}add_inline_imm_64_v2f16:
555; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
556; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
557; GFX10: buffer_store_{{dword|b32}} [[REG]]
558
559; GFX9: s_load_dword [[VAL:s[0-9]+]]
560; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
561; GFX9: buffer_store_dword [[REG]]
562
563; FIXME: Shouldn't need right shift and SDWA, also extra copy
564; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
565; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
566; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
567; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
568
569; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
570; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64
571; VI: v_or_b32
572; VI: buffer_store_dword
573define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
574  %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040>
575  store <2 x half> %y, ptr addrspace(1) %out
576  ret void
577}
578
579; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582
583; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586  ret <2 x i16> %y
587}
588
589; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592
593; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596  ret <2 x i16> %y
597}
598
599; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602
603; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606  ret <2 x i16> %y
607}
608
609; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612
613; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616  ret <2 x i16> %y
617}
618
619; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
620; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1]
621
622; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xe9,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
623define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
624  %y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
625  ret <2 x i16> %y
626}
627
628; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
629; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1]
630
631; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xeb,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
632define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
633  %y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
634  ret <2 x i16> %y
635}
636
637; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640
641; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644  ret <2 x i16> %y
645
646}
647
648; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651
652; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655  ret <2 x i16> %y
656}
657
658; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661
662; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665  ret <2 x i16> %y
666}
667
668attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
669