xref: /llvm-project/llvm/test/CodeGen/AMDGPU/imm16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
5; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
6
7; FIXME: Merge into imm.ll
8
9define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) {
10; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
11; GFX10:       ; %bb.0:
12; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
13; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
14; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
15; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
16; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
17; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
18; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
19; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
20;
21; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
22; GFX11:       ; %bb.0:
23; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
24; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
25; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
26; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
27; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
28; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
29; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
30; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
31;
32; VI-LABEL: store_inline_imm_neg_0.0_i16:
33; VI:       ; %bb.0:
34; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
35; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
36; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
37; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
38; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
39; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
40; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
41; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
42;
43; SI-LABEL: store_inline_imm_neg_0.0_i16:
44; SI:       ; %bb.0:
45; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
46; SI-NEXT:    s_mov_b32 s3, 0xf000
47; SI-NEXT:    s_mov_b32 s2, -1
48; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
49; SI-NEXT:    s_waitcnt lgkmcnt(0)
50; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
51; SI-NEXT:    s_waitcnt vmcnt(0)
52; SI-NEXT:    s_endpgm
53  store volatile i16 -32768, ptr addrspace(1) %out
54  ret void
55}
56
57define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) {
58; GFX10-LABEL: store_inline_imm_0.0_f16:
59; GFX10:       ; %bb.0:
60; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
61; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
62; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
63; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
64; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
65; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
66; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
67;
68; GFX11-LABEL: store_inline_imm_0.0_f16:
69; GFX11:       ; %bb.0:
70; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
71; GFX11-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
72; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
73; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
74; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
75; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
76; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
77;
78; VI-LABEL: store_inline_imm_0.0_f16:
79; VI:       ; %bb.0:
80; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
81; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
82; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
83; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
84; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
85; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
86; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
87;
88; SI-LABEL: store_inline_imm_0.0_f16:
89; SI:       ; %bb.0:
90; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
91; SI-NEXT:    s_mov_b32 s3, 0xf000
92; SI-NEXT:    s_mov_b32 s2, -1
93; SI-NEXT:    v_mov_b32_e32 v0, 0
94; SI-NEXT:    s_waitcnt lgkmcnt(0)
95; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
96; SI-NEXT:    s_endpgm
97  store half 0.0, ptr addrspace(1) %out
98  ret void
99}
100
101define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) {
102; GFX10-LABEL: store_imm_neg_0.0_f16:
103; GFX10:       ; %bb.0:
104; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
105; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
106; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
107; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
108; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
109; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
110; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
111;
112; GFX11-LABEL: store_imm_neg_0.0_f16:
113; GFX11:       ; %bb.0:
114; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
115; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
116; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
117; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
118; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
119; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
120; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
121;
122; VI-LABEL: store_imm_neg_0.0_f16:
123; VI:       ; %bb.0:
124; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
125; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
126; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
127; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
128; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
129; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
130; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
131;
132; SI-LABEL: store_imm_neg_0.0_f16:
133; SI:       ; %bb.0:
134; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
135; SI-NEXT:    s_mov_b32 s3, 0xf000
136; SI-NEXT:    s_mov_b32 s2, -1
137; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
138; SI-NEXT:    s_waitcnt lgkmcnt(0)
139; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
140; SI-NEXT:    s_endpgm
141  store half -0.0, ptr addrspace(1) %out
142  ret void
143}
144
145define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) {
146; GFX10-LABEL: store_inline_imm_0.5_f16:
147; GFX10:       ; %bb.0:
148; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
149; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
150; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
151; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
152; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
153; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
154; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
155;
156; GFX11-LABEL: store_inline_imm_0.5_f16:
157; GFX11:       ; %bb.0:
158; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
159; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
160; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
161; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
162; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
163; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
164; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
165;
166; VI-LABEL: store_inline_imm_0.5_f16:
167; VI:       ; %bb.0:
168; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
169; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
170; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
171; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
172; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
173; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
174; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
175;
176; SI-LABEL: store_inline_imm_0.5_f16:
177; SI:       ; %bb.0:
178; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
179; SI-NEXT:    s_mov_b32 s3, 0xf000
180; SI-NEXT:    s_mov_b32 s2, -1
181; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
182; SI-NEXT:    s_waitcnt lgkmcnt(0)
183; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
184; SI-NEXT:    s_endpgm
185  store half 0.5, ptr addrspace(1) %out
186  ret void
187}
188
189define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) {
190; GFX10-LABEL: store_inline_imm_m_0.5_f16:
191; GFX10:       ; %bb.0:
192; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
193; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
194; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
195; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
196; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
197; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
198; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
199;
200; GFX11-LABEL: store_inline_imm_m_0.5_f16:
201; GFX11:       ; %bb.0:
202; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
203; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
204; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
205; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
206; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
207; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
208; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
209;
210; VI-LABEL: store_inline_imm_m_0.5_f16:
211; VI:       ; %bb.0:
212; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
213; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
214; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
215; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
216; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
217; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
218; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
219;
220; SI-LABEL: store_inline_imm_m_0.5_f16:
221; SI:       ; %bb.0:
222; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
223; SI-NEXT:    s_mov_b32 s3, 0xf000
224; SI-NEXT:    s_mov_b32 s2, -1
225; SI-NEXT:    v_mov_b32_e32 v0, 0xb800
226; SI-NEXT:    s_waitcnt lgkmcnt(0)
227; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
228; SI-NEXT:    s_endpgm
229  store half -0.5, ptr addrspace(1) %out
230  ret void
231}
232
233define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) {
234; GFX10-LABEL: store_inline_imm_1.0_f16:
235; GFX10:       ; %bb.0:
236; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
237; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
238; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
239; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
240; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
241; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
242; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
243;
244; GFX11-LABEL: store_inline_imm_1.0_f16:
245; GFX11:       ; %bb.0:
246; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
247; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
248; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
249; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
250; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
251; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
252; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
253;
254; VI-LABEL: store_inline_imm_1.0_f16:
255; VI:       ; %bb.0:
256; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
257; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
258; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
259; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
260; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
261; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
262; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
263;
264; SI-LABEL: store_inline_imm_1.0_f16:
265; SI:       ; %bb.0:
266; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
267; SI-NEXT:    s_mov_b32 s3, 0xf000
268; SI-NEXT:    s_mov_b32 s2, -1
269; SI-NEXT:    v_mov_b32_e32 v0, 0x3c00
270; SI-NEXT:    s_waitcnt lgkmcnt(0)
271; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
272; SI-NEXT:    s_endpgm
273  store half 1.0, ptr addrspace(1) %out
274  ret void
275}
276
277define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) {
278; GFX10-LABEL: store_inline_imm_m_1.0_f16:
279; GFX10:       ; %bb.0:
280; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
281; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
282; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
283; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
284; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
285; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
286; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
287;
288; GFX11-LABEL: store_inline_imm_m_1.0_f16:
289; GFX11:       ; %bb.0:
290; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
291; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
292; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
293; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
294; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
295; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
296; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
297;
298; VI-LABEL: store_inline_imm_m_1.0_f16:
299; VI:       ; %bb.0:
300; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
301; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
302; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
303; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
304; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
305; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
306; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
307;
308; SI-LABEL: store_inline_imm_m_1.0_f16:
309; SI:       ; %bb.0:
310; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
311; SI-NEXT:    s_mov_b32 s3, 0xf000
312; SI-NEXT:    s_mov_b32 s2, -1
313; SI-NEXT:    v_mov_b32_e32 v0, 0xbc00
314; SI-NEXT:    s_waitcnt lgkmcnt(0)
315; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
316; SI-NEXT:    s_endpgm
317  store half -1.0, ptr addrspace(1) %out
318  ret void
319}
320
321define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) {
322; GFX10-LABEL: store_inline_imm_2.0_f16:
323; GFX10:       ; %bb.0:
324; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
325; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
326; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
327; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
328; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
329; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
330; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
331;
332; GFX11-LABEL: store_inline_imm_2.0_f16:
333; GFX11:       ; %bb.0:
334; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
335; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
336; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
337; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
338; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
339; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
340; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
341;
342; VI-LABEL: store_inline_imm_2.0_f16:
343; VI:       ; %bb.0:
344; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
345; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
346; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
347; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
348; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
349; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
350; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
351;
352; SI-LABEL: store_inline_imm_2.0_f16:
353; SI:       ; %bb.0:
354; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
355; SI-NEXT:    s_mov_b32 s3, 0xf000
356; SI-NEXT:    s_mov_b32 s2, -1
357; SI-NEXT:    v_mov_b32_e32 v0, 0x4000
358; SI-NEXT:    s_waitcnt lgkmcnt(0)
359; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
360; SI-NEXT:    s_endpgm
361  store half 2.0, ptr addrspace(1) %out
362  ret void
363}
364
365define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) {
366; GFX10-LABEL: store_inline_imm_m_2.0_f16:
367; GFX10:       ; %bb.0:
368; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
369; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
370; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
371; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
372; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
373; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
374; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
375;
376; GFX11-LABEL: store_inline_imm_m_2.0_f16:
377; GFX11:       ; %bb.0:
378; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
379; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
380; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
381; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
382; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
383; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
384; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
385;
386; VI-LABEL: store_inline_imm_m_2.0_f16:
387; VI:       ; %bb.0:
388; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
389; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
390; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
391; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
392; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
393; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
394; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
395;
396; SI-LABEL: store_inline_imm_m_2.0_f16:
397; SI:       ; %bb.0:
398; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
399; SI-NEXT:    s_mov_b32 s3, 0xf000
400; SI-NEXT:    s_mov_b32 s2, -1
401; SI-NEXT:    v_mov_b32_e32 v0, 0xc000
402; SI-NEXT:    s_waitcnt lgkmcnt(0)
403; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
404; SI-NEXT:    s_endpgm
405  store half -2.0, ptr addrspace(1) %out
406  ret void
407}
408
409define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) {
410; GFX10-LABEL: store_inline_imm_4.0_f16:
411; GFX10:       ; %bb.0:
412; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
413; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
414; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
415; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
416; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
417; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
418; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
419;
420; GFX11-LABEL: store_inline_imm_4.0_f16:
421; GFX11:       ; %bb.0:
422; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
423; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
424; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
425; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
426; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
427; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
428; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
429;
430; VI-LABEL: store_inline_imm_4.0_f16:
431; VI:       ; %bb.0:
432; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
433; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
434; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
435; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
436; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
437; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
438; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
439;
440; SI-LABEL: store_inline_imm_4.0_f16:
441; SI:       ; %bb.0:
442; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
443; SI-NEXT:    s_mov_b32 s3, 0xf000
444; SI-NEXT:    s_mov_b32 s2, -1
445; SI-NEXT:    v_mov_b32_e32 v0, 0x4400
446; SI-NEXT:    s_waitcnt lgkmcnt(0)
447; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
448; SI-NEXT:    s_endpgm
449  store half 4.0, ptr addrspace(1) %out
450  ret void
451}
452
453define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) {
454; GFX10-LABEL: store_inline_imm_m_4.0_f16:
455; GFX10:       ; %bb.0:
456; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
457; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
458; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
459; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
460; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
461; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
462; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
463;
464; GFX11-LABEL: store_inline_imm_m_4.0_f16:
465; GFX11:       ; %bb.0:
466; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
467; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
468; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
469; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
470; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
471; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
472; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
473;
474; VI-LABEL: store_inline_imm_m_4.0_f16:
475; VI:       ; %bb.0:
476; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
477; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
478; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
479; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
480; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
481; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
482; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
483;
484; SI-LABEL: store_inline_imm_m_4.0_f16:
485; SI:       ; %bb.0:
486; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
487; SI-NEXT:    s_mov_b32 s3, 0xf000
488; SI-NEXT:    s_mov_b32 s2, -1
489; SI-NEXT:    v_mov_b32_e32 v0, 0xc400
490; SI-NEXT:    s_waitcnt lgkmcnt(0)
491; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
492; SI-NEXT:    s_endpgm
493  store half -4.0, ptr addrspace(1) %out
494  ret void
495}
496
497define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) {
498; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
499; GFX10:       ; %bb.0:
500; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
501; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
502; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
503; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
504; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
505; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
506; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
507;
508; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
509; GFX11:       ; %bb.0:
510; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
511; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
512; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
513; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
514; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
515; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
516; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
517;
518; VI-LABEL: store_inline_imm_inv_2pi_f16:
519; VI:       ; %bb.0:
520; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
521; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
522; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
523; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
524; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
525; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
526; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
527;
528; SI-LABEL: store_inline_imm_inv_2pi_f16:
529; SI:       ; %bb.0:
530; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
531; SI-NEXT:    s_mov_b32 s3, 0xf000
532; SI-NEXT:    s_mov_b32 s2, -1
533; SI-NEXT:    v_mov_b32_e32 v0, 0x3118
534; SI-NEXT:    s_waitcnt lgkmcnt(0)
535; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
536; SI-NEXT:    s_endpgm
537  store half 0xH3118, ptr addrspace(1) %out
538  ret void
539}
540
541define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out) {
542; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
543; GFX10:       ; %bb.0:
544; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
545; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
546; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
547; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
548; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
549; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
550; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
551;
552; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
553; GFX11:       ; %bb.0:
554; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
555; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
556; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
557; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
558; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
559; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
560; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
561;
562; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
563; VI:       ; %bb.0:
564; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
565; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
566; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
567; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
568; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
569; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
570; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
571;
572; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
573; SI:       ; %bb.0:
574; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
575; SI-NEXT:    s_mov_b32 s3, 0xf000
576; SI-NEXT:    s_mov_b32 s2, -1
577; SI-NEXT:    v_mov_b32_e32 v0, 0xb118
578; SI-NEXT:    s_waitcnt lgkmcnt(0)
579; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
580; SI-NEXT:    s_endpgm
581  store half 0xHB118, ptr addrspace(1) %out
582  ret void
583}
584
585define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) {
586; GFX10-LABEL: store_literal_imm_f16:
587; GFX10:       ; %bb.0:
588; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
589; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
590; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
591; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
592; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
593; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
594; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
595;
596; GFX11-LABEL: store_literal_imm_f16:
597; GFX11:       ; %bb.0:
598; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
599; GFX11-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
600; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
601; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
602; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
603; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
604; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
605;
606; VI-LABEL: store_literal_imm_f16:
607; VI:       ; %bb.0:
608; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
609; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
610; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
611; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
612; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
613; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
614; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
615;
616; SI-LABEL: store_literal_imm_f16:
617; SI:       ; %bb.0:
618; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
619; SI-NEXT:    s_mov_b32 s3, 0xf000
620; SI-NEXT:    s_mov_b32 s2, -1
621; SI-NEXT:    v_mov_b32_e32 v0, 0x6c00
622; SI-NEXT:    s_waitcnt lgkmcnt(0)
623; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
624; SI-NEXT:    s_endpgm
625  store half 4096.0, ptr addrspace(1) %out
626  ret void
627}
628
629define amdgpu_kernel void @add_inline_imm_0.0_f16(ptr addrspace(1) %out, half %x) {
630; GFX10-LABEL: add_inline_imm_0.0_f16:
631; GFX10:       ; %bb.0:
632; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
633; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
634; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
635; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
636; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
637; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
638; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
639; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
640; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
641;
642; GFX11-LABEL: add_inline_imm_0.0_f16:
643; GFX11:       ; %bb.0:
644; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
645; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
646; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
647; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
648; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
649; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
650; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
651; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
652; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
653;
654; VI-LABEL: add_inline_imm_0.0_f16:
655; VI:       ; %bb.0:
656; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
657; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
658; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
659; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
660; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
661; VI-NEXT:    v_add_f16_e64 v0, s4, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x00,0x01,0x00]
662; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
663; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
664;
665; SI-LABEL: add_inline_imm_0.0_f16:
666; SI:       ; %bb.0:
667; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
668; SI-NEXT:    s_mov_b32 s3, 0xf000
669; SI-NEXT:    s_mov_b32 s2, -1
670; SI-NEXT:    s_waitcnt lgkmcnt(0)
671; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
672; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
673; SI-NEXT:    v_add_f32_e32 v0, 0, v0
674; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
675; SI-NEXT:    s_waitcnt lgkmcnt(0)
676; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
677; SI-NEXT:    s_endpgm
678  %y = fadd half %x, 0.0
679  store half %y, ptr addrspace(1) %out
680  ret void
681}
682
683define amdgpu_kernel void @add_inline_imm_0.5_f16(ptr addrspace(1) %out, half %x) {
684; GFX10-LABEL: add_inline_imm_0.5_f16:
685; GFX10:       ; %bb.0:
686; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
687; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
688; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
689; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
690; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
691; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
692; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
693; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
694; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
695;
696; GFX11-LABEL: add_inline_imm_0.5_f16:
697; GFX11:       ; %bb.0:
698; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
699; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
700; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
701; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
702; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
703; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
704; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
705; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
706; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
707;
708; VI-LABEL: add_inline_imm_0.5_f16:
709; VI:       ; %bb.0:
710; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
711; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
712; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
713; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
714; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
715; VI-NEXT:    v_add_f16_e64 v0, s4, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe0,0x01,0x00]
716; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
717; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
718;
719; SI-LABEL: add_inline_imm_0.5_f16:
720; SI:       ; %bb.0:
721; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
722; SI-NEXT:    s_mov_b32 s3, 0xf000
723; SI-NEXT:    s_mov_b32 s2, -1
724; SI-NEXT:    s_waitcnt lgkmcnt(0)
725; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
726; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
727; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
728; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
729; SI-NEXT:    s_waitcnt lgkmcnt(0)
730; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
731; SI-NEXT:    s_endpgm
732  %y = fadd half %x, 0.5
733  store half %y, ptr addrspace(1) %out
734  ret void
735}
736
737define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(ptr addrspace(1) %out, half %x) {
738; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
739; GFX10:       ; %bb.0:
740; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
741; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
742; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
743; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
744; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
745; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
746; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
747; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
748; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
749;
750; GFX11-LABEL: add_inline_imm_neg_0.5_f16:
751; GFX11:       ; %bb.0:
752; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
753; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
754; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
755; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
756; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
757; GFX11-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
758; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
759; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
760; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
761;
762; VI-LABEL: add_inline_imm_neg_0.5_f16:
763; VI:       ; %bb.0:
764; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
765; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
766; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
767; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
768; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
769; VI-NEXT:    v_add_f16_e64 v0, s4, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe2,0x01,0x00]
770; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
771; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
772;
773; SI-LABEL: add_inline_imm_neg_0.5_f16:
774; SI:       ; %bb.0:
775; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
776; SI-NEXT:    s_mov_b32 s3, 0xf000
777; SI-NEXT:    s_mov_b32 s2, -1
778; SI-NEXT:    s_waitcnt lgkmcnt(0)
779; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
780; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
781; SI-NEXT:    v_add_f32_e32 v0, -0.5, v0
782; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
783; SI-NEXT:    s_waitcnt lgkmcnt(0)
784; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
785; SI-NEXT:    s_endpgm
786  %y = fadd half %x, -0.5
787  store half %y, ptr addrspace(1) %out
788  ret void
789}
790
791define amdgpu_kernel void @add_inline_imm_1.0_f16(ptr addrspace(1) %out, half %x) {
792; GFX10-LABEL: add_inline_imm_1.0_f16:
793; GFX10:       ; %bb.0:
794; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
795; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
796; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
797; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
798; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
799; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
800; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
801; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
802; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
803;
804; GFX11-LABEL: add_inline_imm_1.0_f16:
805; GFX11:       ; %bb.0:
806; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
807; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
808; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
809; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
810; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
811; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
812; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
813; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
814; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
815;
816; VI-LABEL: add_inline_imm_1.0_f16:
817; VI:       ; %bb.0:
818; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
819; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
820; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
821; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
822; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
823; VI-NEXT:    v_add_f16_e64 v0, s4, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe4,0x01,0x00]
824; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
825; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
826;
827; SI-LABEL: add_inline_imm_1.0_f16:
828; SI:       ; %bb.0:
829; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
830; SI-NEXT:    s_mov_b32 s3, 0xf000
831; SI-NEXT:    s_mov_b32 s2, -1
832; SI-NEXT:    s_waitcnt lgkmcnt(0)
833; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
834; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
835; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
836; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
837; SI-NEXT:    s_waitcnt lgkmcnt(0)
838; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
839; SI-NEXT:    s_endpgm
840  %y = fadd half %x, 1.0
841  store half %y, ptr addrspace(1) %out
842  ret void
843}
844
845define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(ptr addrspace(1) %out, half %x) {
846; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
847; GFX10:       ; %bb.0:
848; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
849; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
850; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
851; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
852; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
853; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
854; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
855; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
856; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
857;
858; GFX11-LABEL: add_inline_imm_neg_1.0_f16:
859; GFX11:       ; %bb.0:
860; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
861; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
862; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
863; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
864; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
865; GFX11-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
866; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
867; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
868; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
869;
870; VI-LABEL: add_inline_imm_neg_1.0_f16:
871; VI:       ; %bb.0:
872; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
873; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
874; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
875; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
876; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
877; VI-NEXT:    v_add_f16_e64 v0, s4, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe6,0x01,0x00]
878; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
879; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
880;
881; SI-LABEL: add_inline_imm_neg_1.0_f16:
882; SI:       ; %bb.0:
883; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
884; SI-NEXT:    s_mov_b32 s3, 0xf000
885; SI-NEXT:    s_mov_b32 s2, -1
886; SI-NEXT:    s_waitcnt lgkmcnt(0)
887; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
888; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
889; SI-NEXT:    v_add_f32_e32 v0, -1.0, v0
890; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
891; SI-NEXT:    s_waitcnt lgkmcnt(0)
892; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
893; SI-NEXT:    s_endpgm
894  %y = fadd half %x, -1.0
895  store half %y, ptr addrspace(1) %out
896  ret void
897}
898
899define amdgpu_kernel void @add_inline_imm_2.0_f16(ptr addrspace(1) %out, half %x) {
900; GFX10-LABEL: add_inline_imm_2.0_f16:
901; GFX10:       ; %bb.0:
902; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
903; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
904; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
905; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
906; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
907; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
908; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
909; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
910; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
911;
912; GFX11-LABEL: add_inline_imm_2.0_f16:
913; GFX11:       ; %bb.0:
914; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
915; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
916; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
917; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
918; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
919; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
920; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
921; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
922; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
923;
924; VI-LABEL: add_inline_imm_2.0_f16:
925; VI:       ; %bb.0:
926; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
927; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
928; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
929; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
930; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
931; VI-NEXT:    v_add_f16_e64 v0, s4, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe8,0x01,0x00]
932; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
933; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
934;
935; SI-LABEL: add_inline_imm_2.0_f16:
936; SI:       ; %bb.0:
937; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
938; SI-NEXT:    s_mov_b32 s3, 0xf000
939; SI-NEXT:    s_mov_b32 s2, -1
940; SI-NEXT:    s_waitcnt lgkmcnt(0)
941; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
942; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
943; SI-NEXT:    v_add_f32_e32 v0, 2.0, v0
944; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
945; SI-NEXT:    s_waitcnt lgkmcnt(0)
946; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
947; SI-NEXT:    s_endpgm
948  %y = fadd half %x, 2.0
949  store half %y, ptr addrspace(1) %out
950  ret void
951}
952
953define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(ptr addrspace(1) %out, half %x) {
954; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
955; GFX10:       ; %bb.0:
956; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
957; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
958; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
959; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
960; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
961; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
962; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
963; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
964; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
965;
966; GFX11-LABEL: add_inline_imm_neg_2.0_f16:
967; GFX11:       ; %bb.0:
968; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
969; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
970; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
971; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
972; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
973; GFX11-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
974; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
975; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
976; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
977;
978; VI-LABEL: add_inline_imm_neg_2.0_f16:
979; VI:       ; %bb.0:
980; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
981; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
982; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
983; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
984; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
985; VI-NEXT:    v_add_f16_e64 v0, s4, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xea,0x01,0x00]
986; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
987; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
988;
989; SI-LABEL: add_inline_imm_neg_2.0_f16:
990; SI:       ; %bb.0:
991; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
992; SI-NEXT:    s_mov_b32 s3, 0xf000
993; SI-NEXT:    s_mov_b32 s2, -1
994; SI-NEXT:    s_waitcnt lgkmcnt(0)
995; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
996; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
997; SI-NEXT:    v_add_f32_e32 v0, -2.0, v0
998; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
999; SI-NEXT:    s_waitcnt lgkmcnt(0)
1000; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1001; SI-NEXT:    s_endpgm
1002  %y = fadd half %x, -2.0
1003  store half %y, ptr addrspace(1) %out
1004  ret void
1005}
1006
1007define amdgpu_kernel void @add_inline_imm_4.0_f16(ptr addrspace(1) %out, half %x) {
1008; GFX10-LABEL: add_inline_imm_4.0_f16:
1009; GFX10:       ; %bb.0:
1010; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1011; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1012; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1013; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1014; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1015; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1016; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1017; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1018; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1019;
1020; GFX11-LABEL: add_inline_imm_4.0_f16:
1021; GFX11:       ; %bb.0:
1022; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1023; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1024; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1025; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1026; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1027; GFX11-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1028; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1029; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1030; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1031;
1032; VI-LABEL: add_inline_imm_4.0_f16:
1033; VI:       ; %bb.0:
1034; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1035; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1036; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1037; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1038; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1039; VI-NEXT:    v_add_f16_e64 v0, s4, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xec,0x01,0x00]
1040; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1041; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1042;
1043; SI-LABEL: add_inline_imm_4.0_f16:
1044; SI:       ; %bb.0:
1045; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1046; SI-NEXT:    s_mov_b32 s3, 0xf000
1047; SI-NEXT:    s_mov_b32 s2, -1
1048; SI-NEXT:    s_waitcnt lgkmcnt(0)
1049; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1050; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1051; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
1052; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1053; SI-NEXT:    s_waitcnt lgkmcnt(0)
1054; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1055; SI-NEXT:    s_endpgm
1056  %y = fadd half %x, 4.0
1057  store half %y, ptr addrspace(1) %out
1058  ret void
1059}
1060
1061define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(ptr addrspace(1) %out, half %x) {
1062; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
1063; GFX10:       ; %bb.0:
1064; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1065; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1066; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1067; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1068; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1069; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1070; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1071; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1072; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1073;
1074; GFX11-LABEL: add_inline_imm_neg_4.0_f16:
1075; GFX11:       ; %bb.0:
1076; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1077; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1078; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1079; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1080; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1081; GFX11-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1082; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1083; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1084; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1085;
1086; VI-LABEL: add_inline_imm_neg_4.0_f16:
1087; VI:       ; %bb.0:
1088; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1089; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1090; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1091; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1092; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1093; VI-NEXT:    v_add_f16_e64 v0, s4, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xee,0x01,0x00]
1094; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1095; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1096;
1097; SI-LABEL: add_inline_imm_neg_4.0_f16:
1098; SI:       ; %bb.0:
1099; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1100; SI-NEXT:    s_mov_b32 s3, 0xf000
1101; SI-NEXT:    s_mov_b32 s2, -1
1102; SI-NEXT:    s_waitcnt lgkmcnt(0)
1103; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1104; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1105; SI-NEXT:    v_add_f32_e32 v0, -4.0, v0
1106; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1107; SI-NEXT:    s_waitcnt lgkmcnt(0)
1108; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1109; SI-NEXT:    s_endpgm
1110  %y = fadd half %x, -4.0
1111  store half %y, ptr addrspace(1) %out
1112  ret void
1113}
1114
1115define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1116; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
1117; GFX10:       ; %bb.0:
1118; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1119; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1120; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1121; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1122; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1123; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1124; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1125; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1126; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1127; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1128; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1129; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1130; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1131; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1132; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1133;
1134; GFX11-LABEL: commute_add_inline_imm_0.5_f16:
1135; GFX11:       ; %bb.0:
1136; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1137; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1138; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1139; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1140; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1141; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1142; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1143; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1144; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1145; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1146; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1147; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1148; GFX11-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1149; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1150; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1151;
1152; VI-LABEL: commute_add_inline_imm_0.5_f16:
1153; VI:       ; %bb.0:
1154; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1155; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1156; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1157; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1158; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1159; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1160; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1161; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1162; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1163; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1164; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1165; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1166; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
1167; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1168; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1169;
1170; SI-LABEL: commute_add_inline_imm_0.5_f16:
1171; SI:       ; %bb.0:
1172; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1173; SI-NEXT:    s_mov_b32 s7, 0xf000
1174; SI-NEXT:    s_mov_b32 s6, -1
1175; SI-NEXT:    s_mov_b32 s10, s6
1176; SI-NEXT:    s_mov_b32 s11, s7
1177; SI-NEXT:    s_waitcnt lgkmcnt(0)
1178; SI-NEXT:    s_mov_b32 s8, s2
1179; SI-NEXT:    s_mov_b32 s9, s3
1180; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1181; SI-NEXT:    s_mov_b32 s4, s0
1182; SI-NEXT:    s_mov_b32 s5, s1
1183; SI-NEXT:    s_waitcnt vmcnt(0)
1184; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1185; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
1186; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1187; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1188; SI-NEXT:    s_endpgm
1189  %x = load half, ptr addrspace(1) %in
1190  %y = fadd half %x, 0.5
1191  store half %y, ptr addrspace(1) %out
1192  ret void
1193}
1194
1195define amdgpu_kernel void @commute_add_literal_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1196; GFX10-LABEL: commute_add_literal_f16:
1197; GFX10:       ; %bb.0:
1198; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1199; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1200; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1201; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1202; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1203; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1204; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1205; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1206; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1207; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1208; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1209; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1210; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1211; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1212; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1213;
1214; GFX11-LABEL: commute_add_literal_f16:
1215; GFX11:       ; %bb.0:
1216; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1217; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1218; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1219; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1220; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1221; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1222; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1223; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1224; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1225; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1226; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1227; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1228; GFX11-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1229; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1230; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1231;
1232; VI-LABEL: commute_add_literal_f16:
1233; VI:       ; %bb.0:
1234; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1235; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1236; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1237; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1238; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1239; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1240; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1241; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1242; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1243; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1244; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1245; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1246; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
1247; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1248; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1249;
1250; SI-LABEL: commute_add_literal_f16:
1251; SI:       ; %bb.0:
1252; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1253; SI-NEXT:    s_mov_b32 s7, 0xf000
1254; SI-NEXT:    s_mov_b32 s6, -1
1255; SI-NEXT:    s_mov_b32 s10, s6
1256; SI-NEXT:    s_mov_b32 s11, s7
1257; SI-NEXT:    s_waitcnt lgkmcnt(0)
1258; SI-NEXT:    s_mov_b32 s8, s2
1259; SI-NEXT:    s_mov_b32 s9, s3
1260; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1261; SI-NEXT:    s_mov_b32 s4, s0
1262; SI-NEXT:    s_mov_b32 s5, s1
1263; SI-NEXT:    s_waitcnt vmcnt(0)
1264; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1265; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
1266; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1267; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1268; SI-NEXT:    s_endpgm
1269  %x = load half, ptr addrspace(1) %in
1270  %y = fadd half %x, 1024.0
1271  store half %y, ptr addrspace(1) %out
1272  ret void
1273}
1274
1275define amdgpu_kernel void @add_inline_imm_1_f16(ptr addrspace(1) %out, half %x) {
1276; GFX10-LABEL: add_inline_imm_1_f16:
1277; GFX10:       ; %bb.0:
1278; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1279; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1280; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1281; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1282; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1283; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1284; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1285; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1286; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1287;
1288; GFX11-LABEL: add_inline_imm_1_f16:
1289; GFX11:       ; %bb.0:
1290; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1291; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1292; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1293; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1294; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1295; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1296; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1297; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1298; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1299;
1300; VI-LABEL: add_inline_imm_1_f16:
1301; VI:       ; %bb.0:
1302; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1303; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1304; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1305; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1306; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1307; VI-NEXT:    v_add_f16_e64 v0, s4, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x02,0x01,0x00]
1308; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1309; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1310;
1311; SI-LABEL: add_inline_imm_1_f16:
1312; SI:       ; %bb.0:
1313; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1314; SI-NEXT:    s_mov_b32 s3, 0xf000
1315; SI-NEXT:    s_mov_b32 s2, -1
1316; SI-NEXT:    s_waitcnt lgkmcnt(0)
1317; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1318; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1319; SI-NEXT:    v_add_f32_e32 v0, 0x33800000, v0
1320; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1321; SI-NEXT:    s_waitcnt lgkmcnt(0)
1322; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1323; SI-NEXT:    s_endpgm
1324  %y = fadd half %x, 0xH0001
1325  store half %y, ptr addrspace(1) %out
1326  ret void
1327}
1328
1329define amdgpu_kernel void @add_inline_imm_2_f16(ptr addrspace(1) %out, half %x) {
1330; GFX10-LABEL: add_inline_imm_2_f16:
1331; GFX10:       ; %bb.0:
1332; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1333; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1334; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1335; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1336; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1337; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1338; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1339; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1340; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1341;
1342; GFX11-LABEL: add_inline_imm_2_f16:
1343; GFX11:       ; %bb.0:
1344; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1345; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1346; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1347; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1348; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1349; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1350; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1351; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1352; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1353;
1354; VI-LABEL: add_inline_imm_2_f16:
1355; VI:       ; %bb.0:
1356; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1357; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1358; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1359; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1360; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1361; VI-NEXT:    v_add_f16_e64 v0, s4, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x04,0x01,0x00]
1362; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1363; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1364;
1365; SI-LABEL: add_inline_imm_2_f16:
1366; SI:       ; %bb.0:
1367; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1368; SI-NEXT:    s_mov_b32 s3, 0xf000
1369; SI-NEXT:    s_mov_b32 s2, -1
1370; SI-NEXT:    s_waitcnt lgkmcnt(0)
1371; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1372; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1373; SI-NEXT:    v_add_f32_e32 v0, 0x34000000, v0
1374; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1375; SI-NEXT:    s_waitcnt lgkmcnt(0)
1376; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1377; SI-NEXT:    s_endpgm
1378  %y = fadd half %x, 0xH0002
1379  store half %y, ptr addrspace(1) %out
1380  ret void
1381}
1382
1383define amdgpu_kernel void @add_inline_imm_16_f16(ptr addrspace(1) %out, half %x) {
1384; GFX10-LABEL: add_inline_imm_16_f16:
1385; GFX10:       ; %bb.0:
1386; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1387; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1388; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1389; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1390; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1391; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1392; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1393; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1394; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1395;
1396; GFX11-LABEL: add_inline_imm_16_f16:
1397; GFX11:       ; %bb.0:
1398; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1399; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1400; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1401; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1402; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1403; GFX11-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1404; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1405; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1406; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1407;
1408; VI-LABEL: add_inline_imm_16_f16:
1409; VI:       ; %bb.0:
1410; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1411; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1412; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1413; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1414; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1415; VI-NEXT:    v_add_f16_e64 v0, s4, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x20,0x01,0x00]
1416; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1417; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1418;
1419; SI-LABEL: add_inline_imm_16_f16:
1420; SI:       ; %bb.0:
1421; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1422; SI-NEXT:    s_mov_b32 s3, 0xf000
1423; SI-NEXT:    s_mov_b32 s2, -1
1424; SI-NEXT:    s_waitcnt lgkmcnt(0)
1425; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1426; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1427; SI-NEXT:    v_add_f32_e32 v0, 0x35800000, v0
1428; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1429; SI-NEXT:    s_waitcnt lgkmcnt(0)
1430; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1431; SI-NEXT:    s_endpgm
1432  %y = fadd half %x, 0xH0010
1433  store half %y, ptr addrspace(1) %out
1434  ret void
1435}
1436
1437define amdgpu_kernel void @add_inline_imm_neg_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1438; GFX10-LABEL: add_inline_imm_neg_1_f16:
1439; GFX10:       ; %bb.0:
1440; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1441; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1442; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1443; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1444; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1445; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1446; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1447; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1448; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1449; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1450; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1451; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1452; GFX10-NEXT:    v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a]
1453; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1454; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1455;
1456; GFX11-LABEL: add_inline_imm_neg_1_f16:
1457; GFX11:       ; %bb.0:
1458; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1459; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1460; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1461; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1462; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1463; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1464; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1465; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1466; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1467; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1468; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1469; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1470; GFX11-NEXT:    v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a]
1471; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1472; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1473;
1474; VI-LABEL: add_inline_imm_neg_1_f16:
1475; VI:       ; %bb.0:
1476; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1477; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1478; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1479; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1480; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1481; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1482; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1483; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1484; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1485; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1486; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1487; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1488; VI-NEXT:    v_add_u32_e32 v0, vcc, -1, v0 ; encoding: [0xc1,0x00,0x00,0x32]
1489; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1490; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1491;
1492; SI-LABEL: add_inline_imm_neg_1_f16:
1493; SI:       ; %bb.0:
1494; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1495; SI-NEXT:    s_mov_b32 s7, 0xf000
1496; SI-NEXT:    s_mov_b32 s6, -1
1497; SI-NEXT:    s_mov_b32 s10, s6
1498; SI-NEXT:    s_mov_b32 s11, s7
1499; SI-NEXT:    s_waitcnt lgkmcnt(0)
1500; SI-NEXT:    s_mov_b32 s8, s2
1501; SI-NEXT:    s_mov_b32 s9, s3
1502; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1503; SI-NEXT:    s_mov_b32 s4, s0
1504; SI-NEXT:    s_mov_b32 s5, s1
1505; SI-NEXT:    s_waitcnt vmcnt(0)
1506; SI-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
1507; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1508; SI-NEXT:    s_endpgm
1509  %x = load i16, ptr addrspace(1) %in
1510  %y = add i16 %x, -1
1511  %ybc = bitcast i16 %y to half
1512  store half %ybc, ptr addrspace(1) %out
1513  ret void
1514}
1515
1516define amdgpu_kernel void @add_inline_imm_neg_2_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1517; GFX10-LABEL: add_inline_imm_neg_2_f16:
1518; GFX10:       ; %bb.0:
1519; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1520; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1521; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1522; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1523; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1524; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1525; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1526; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1527; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1528; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1529; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1530; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1531; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00]
1532; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1533; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1534;
1535; GFX11-LABEL: add_inline_imm_neg_2_f16:
1536; GFX11:       ; %bb.0:
1537; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1538; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1539; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1540; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1541; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1542; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1543; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1544; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1545; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1546; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1547; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1548; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1549; GFX11-NEXT:    v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00]
1550; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1551; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1552;
1553; VI-LABEL: add_inline_imm_neg_2_f16:
1554; VI:       ; %bb.0:
1555; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1556; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1557; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1558; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1559; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1560; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1561; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1562; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1563; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1564; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1565; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1566; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1567; VI-NEXT:    v_add_u32_e32 v0, vcc, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x32,0xfe,0xff,0x00,0x00]
1568; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1569; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1570;
1571; SI-LABEL: add_inline_imm_neg_2_f16:
1572; SI:       ; %bb.0:
1573; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1574; SI-NEXT:    s_mov_b32 s7, 0xf000
1575; SI-NEXT:    s_mov_b32 s6, -1
1576; SI-NEXT:    s_mov_b32 s10, s6
1577; SI-NEXT:    s_mov_b32 s11, s7
1578; SI-NEXT:    s_waitcnt lgkmcnt(0)
1579; SI-NEXT:    s_mov_b32 s8, s2
1580; SI-NEXT:    s_mov_b32 s9, s3
1581; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1582; SI-NEXT:    s_mov_b32 s4, s0
1583; SI-NEXT:    s_mov_b32 s5, s1
1584; SI-NEXT:    s_waitcnt vmcnt(0)
1585; SI-NEXT:    v_add_i32_e32 v0, vcc, -2, v0
1586; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1587; SI-NEXT:    s_endpgm
1588  %x = load i16, ptr addrspace(1) %in
1589  %y = add i16 %x, -2
1590  %ybc = bitcast i16 %y to half
1591  store half %ybc, ptr addrspace(1) %out
1592  ret void
1593}
1594
1595define amdgpu_kernel void @add_inline_imm_neg_16_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1596; GFX10-LABEL: add_inline_imm_neg_16_f16:
1597; GFX10:       ; %bb.0:
1598; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1599; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1600; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1601; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1602; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1603; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1604; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1605; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1606; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1607; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1608; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1609; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1610; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00]
1611; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1612; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1613;
1614; GFX11-LABEL: add_inline_imm_neg_16_f16:
1615; GFX11:       ; %bb.0:
1616; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1617; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1618; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1619; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1620; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1621; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1622; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1623; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1624; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1625; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1626; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1627; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1628; GFX11-NEXT:    v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00]
1629; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1630; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1631;
1632; VI-LABEL: add_inline_imm_neg_16_f16:
1633; VI:       ; %bb.0:
1634; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1635; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1636; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1637; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1638; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1639; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1640; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1641; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1642; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1643; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1644; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1645; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1646; VI-NEXT:    v_add_u32_e32 v0, vcc, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x32,0xf0,0xff,0x00,0x00]
1647; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1648; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1649;
1650; SI-LABEL: add_inline_imm_neg_16_f16:
1651; SI:       ; %bb.0:
1652; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1653; SI-NEXT:    s_mov_b32 s7, 0xf000
1654; SI-NEXT:    s_mov_b32 s6, -1
1655; SI-NEXT:    s_mov_b32 s10, s6
1656; SI-NEXT:    s_mov_b32 s11, s7
1657; SI-NEXT:    s_waitcnt lgkmcnt(0)
1658; SI-NEXT:    s_mov_b32 s8, s2
1659; SI-NEXT:    s_mov_b32 s9, s3
1660; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1661; SI-NEXT:    s_mov_b32 s4, s0
1662; SI-NEXT:    s_mov_b32 s5, s1
1663; SI-NEXT:    s_waitcnt vmcnt(0)
1664; SI-NEXT:    v_add_i32_e32 v0, vcc, -16, v0
1665; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1666; SI-NEXT:    s_endpgm
1667  %x = load i16, ptr addrspace(1) %in
1668  %y = add i16 %x, -16
1669  %ybc = bitcast i16 %y to half
1670  store half %ybc, ptr addrspace(1) %out
1671  ret void
1672}
1673
1674define amdgpu_kernel void @add_inline_imm_63_f16(ptr addrspace(1) %out, half %x) {
1675; GFX10-LABEL: add_inline_imm_63_f16:
1676; GFX10:       ; %bb.0:
1677; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1678; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1679; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1680; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1681; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1682; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1683; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1684; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1685; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1686;
1687; GFX11-LABEL: add_inline_imm_63_f16:
1688; GFX11:       ; %bb.0:
1689; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1690; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1691; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1692; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1693; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1694; GFX11-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1695; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1696; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1697; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1698;
1699; VI-LABEL: add_inline_imm_63_f16:
1700; VI:       ; %bb.0:
1701; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1702; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1703; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1704; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1705; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1706; VI-NEXT:    v_add_f16_e64 v0, s4, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x7e,0x01,0x00]
1707; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1708; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1709;
1710; SI-LABEL: add_inline_imm_63_f16:
1711; SI:       ; %bb.0:
1712; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1713; SI-NEXT:    s_mov_b32 s3, 0xf000
1714; SI-NEXT:    s_mov_b32 s2, -1
1715; SI-NEXT:    s_waitcnt lgkmcnt(0)
1716; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1717; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1718; SI-NEXT:    v_add_f32_e32 v0, 0x367c0000, v0
1719; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1720; SI-NEXT:    s_waitcnt lgkmcnt(0)
1721; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1722; SI-NEXT:    s_endpgm
1723  %y = fadd half %x, 0xH003F
1724  store half %y, ptr addrspace(1) %out
1725  ret void
1726}
1727
1728define amdgpu_kernel void @add_inline_imm_64_f16(ptr addrspace(1) %out, half %x) {
1729; GFX10-LABEL: add_inline_imm_64_f16:
1730; GFX10:       ; %bb.0:
1731; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1732; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1733; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1734; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1735; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1736; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1737; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1738; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1739; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1740;
1741; GFX11-LABEL: add_inline_imm_64_f16:
1742; GFX11:       ; %bb.0:
1743; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1744; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1745; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1746; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1747; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1748; GFX11-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1749; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1750; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1751; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1752;
1753; VI-LABEL: add_inline_imm_64_f16:
1754; VI:       ; %bb.0:
1755; VI-NEXT:    s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1756; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1757; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1758; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1759; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1760; VI-NEXT:    v_add_f16_e64 v0, s4, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x80,0x01,0x00]
1761; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1762; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1763;
1764; SI-LABEL: add_inline_imm_64_f16:
1765; SI:       ; %bb.0:
1766; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
1767; SI-NEXT:    s_mov_b32 s3, 0xf000
1768; SI-NEXT:    s_mov_b32 s2, -1
1769; SI-NEXT:    s_waitcnt lgkmcnt(0)
1770; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
1771; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1772; SI-NEXT:    v_add_f32_e32 v0, 0x36800000, v0
1773; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1774; SI-NEXT:    s_waitcnt lgkmcnt(0)
1775; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1776; SI-NEXT:    s_endpgm
1777  %y = fadd half %x, 0xH0040
1778  store half %y, ptr addrspace(1) %out
1779  ret void
1780}
1781
1782; This needs to be emitted as a literal constant since the 16-bit
1783; float values do not work for 16-bit integer operations.
1784define void @mul_inline_imm_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1785; GFX10-LABEL: mul_inline_imm_0.5_i16:
1786; GFX10:       ; %bb.0:
1787; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1788; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1789; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1790; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1791;
1792; GFX11-LABEL: mul_inline_imm_0.5_i16:
1793; GFX11:       ; %bb.0:
1794; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1795; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1796; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1797; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1798;
1799; VI-LABEL: mul_inline_imm_0.5_i16:
1800; VI:       ; %bb.0:
1801; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1802; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1803; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1804; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1805; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1806;
1807; SI-LABEL: mul_inline_imm_0.5_i16:
1808; SI:       ; %bb.0:
1809; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1810; SI-NEXT:    s_mov_b32 s6, 0
1811; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1812; SI-NEXT:    s_mov_b32 s7, 0xf000
1813; SI-NEXT:    s_mov_b32 s4, s6
1814; SI-NEXT:    s_mov_b32 s5, s6
1815; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
1816; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1817; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1818; SI-NEXT:    s_setpc_b64 s[30:31]
1819  %y = mul i16 %x, bitcast (half 0.5 to i16)
1820  store i16 %y, ptr addrspace(1) %out
1821  ret void
1822}
1823
1824define void @mul_inline_imm_neg_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1825; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1826; GFX10:       ; %bb.0:
1827; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1828; GFX10-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1829; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1830; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1831;
1832; GFX11-LABEL: mul_inline_imm_neg_0.5_i16:
1833; GFX11:       ; %bb.0:
1834; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1835; GFX11-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1836; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1837; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1838;
1839; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1840; VI:       ; %bb.0:
1841; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1842; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1843; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1844; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1845; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1846;
1847; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1848; SI:       ; %bb.0:
1849; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850; SI-NEXT:    s_mov_b32 s6, 0
1851; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1852; SI-NEXT:    s_mov_b32 s7, 0xf000
1853; SI-NEXT:    s_mov_b32 s4, s6
1854; SI-NEXT:    s_mov_b32 s5, s6
1855; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
1856; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1857; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1858; SI-NEXT:    s_setpc_b64 s[30:31]
1859  %y = mul i16 %x, bitcast (half -0.5 to i16)
1860  store i16 %y, ptr addrspace(1) %out
1861  ret void
1862}
1863
1864define void @mul_inline_imm_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1865; GFX10-LABEL: mul_inline_imm_1.0_i16:
1866; GFX10:       ; %bb.0:
1867; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1868; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1869; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1870; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1871;
1872; GFX11-LABEL: mul_inline_imm_1.0_i16:
1873; GFX11:       ; %bb.0:
1874; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1875; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1876; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1877; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1878;
1879; VI-LABEL: mul_inline_imm_1.0_i16:
1880; VI:       ; %bb.0:
1881; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1882; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1883; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1884; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1885; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1886;
1887; SI-LABEL: mul_inline_imm_1.0_i16:
1888; SI:       ; %bb.0:
1889; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1890; SI-NEXT:    s_mov_b32 s6, 0
1891; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1892; SI-NEXT:    s_mov_b32 s7, 0xf000
1893; SI-NEXT:    s_mov_b32 s4, s6
1894; SI-NEXT:    s_mov_b32 s5, s6
1895; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
1896; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1897; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1898; SI-NEXT:    s_setpc_b64 s[30:31]
1899  %y = mul i16 %x, bitcast (half 1.0 to i16)
1900  store i16 %y, ptr addrspace(1) %out
1901  ret void
1902}
1903
1904define void @mul_inline_imm_neg_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1905; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1906; GFX10:       ; %bb.0:
1907; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1908; GFX10-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1909; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1910; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1911;
1912; GFX11-LABEL: mul_inline_imm_neg_1.0_i16:
1913; GFX11:       ; %bb.0:
1914; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1915; GFX11-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1916; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1917; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1918;
1919; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1920; VI:       ; %bb.0:
1921; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1922; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1923; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1924; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1925; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1926;
1927; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1928; SI:       ; %bb.0:
1929; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930; SI-NEXT:    s_mov_b32 s6, 0
1931; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1932; SI-NEXT:    s_mov_b32 s7, 0xf000
1933; SI-NEXT:    s_mov_b32 s4, s6
1934; SI-NEXT:    s_mov_b32 s5, s6
1935; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
1936; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1937; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1938; SI-NEXT:    s_setpc_b64 s[30:31]
1939  %y = mul i16 %x, bitcast (half -1.0 to i16)
1940  store i16 %y, ptr addrspace(1) %out
1941  ret void
1942}
1943
1944define void @shl_inline_imm_2.0_i16(ptr addrspace(1) %out, i16 %x) {
1945; GFX10-LABEL: shl_inline_imm_2.0_i16:
1946; GFX10:       ; %bb.0:
1947; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1948; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1949; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1950; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1951;
1952; GFX11-LABEL: shl_inline_imm_2.0_i16:
1953; GFX11:       ; %bb.0:
1954; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1955; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1956; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1957; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1958;
1959; VI-LABEL: shl_inline_imm_2.0_i16:
1960; VI:       ; %bb.0:
1961; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1962; VI-NEXT:    s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
1963; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1964; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1965; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1966; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1967;
1968; SI-LABEL: shl_inline_imm_2.0_i16:
1969; SI:       ; %bb.0:
1970; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1971; SI-NEXT:    s_mov_b32 s6, 0
1972; SI-NEXT:    s_mov_b32 s7, 0xf000
1973; SI-NEXT:    s_mov_b32 s4, s6
1974; SI-NEXT:    s_mov_b32 s5, s6
1975; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
1976; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1977; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1978; SI-NEXT:    s_setpc_b64 s[30:31]
1979  %y = shl i16 bitcast (half 2.0 to i16), %x
1980  store i16 %y, ptr addrspace(1) %out
1981  ret void
1982}
1983
1984define void @shl_inline_imm_neg_2.0_i16(ptr addrspace(1) %out, i16 %x) {
1985; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
1986; GFX10:       ; %bb.0:
1987; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1988; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1989; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1990; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1991;
1992; GFX11-LABEL: shl_inline_imm_neg_2.0_i16:
1993; GFX11:       ; %bb.0:
1994; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1995; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1996; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1997; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1998;
1999; VI-LABEL: shl_inline_imm_neg_2.0_i16:
2000; VI:       ; %bb.0:
2001; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2002; VI-NEXT:    s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
2003; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2004; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2005; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2006; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2007;
2008; SI-LABEL: shl_inline_imm_neg_2.0_i16:
2009; SI:       ; %bb.0:
2010; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2011; SI-NEXT:    s_mov_b32 s6, 0
2012; SI-NEXT:    s_mov_b32 s7, 0xf000
2013; SI-NEXT:    s_mov_b32 s4, s6
2014; SI-NEXT:    s_mov_b32 s5, s6
2015; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
2016; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2017; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2018; SI-NEXT:    s_setpc_b64 s[30:31]
2019  %y = shl i16 bitcast (half -2.0 to i16), %x
2020  store i16 %y, ptr addrspace(1) %out
2021  ret void
2022}
2023
2024define void @mul_inline_imm_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2025; GFX10-LABEL: mul_inline_imm_4.0_i16:
2026; GFX10:       ; %bb.0:
2027; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2028; GFX10-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2029; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2030; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2031;
2032; GFX11-LABEL: mul_inline_imm_4.0_i16:
2033; GFX11:       ; %bb.0:
2034; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2035; GFX11-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2036; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2037; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2038;
2039; VI-LABEL: mul_inline_imm_4.0_i16:
2040; VI:       ; %bb.0:
2041; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2042; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
2043; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2044; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2045; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2046;
2047; SI-LABEL: mul_inline_imm_4.0_i16:
2048; SI:       ; %bb.0:
2049; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050; SI-NEXT:    s_mov_b32 s6, 0
2051; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2052; SI-NEXT:    s_mov_b32 s7, 0xf000
2053; SI-NEXT:    s_mov_b32 s4, s6
2054; SI-NEXT:    s_mov_b32 s5, s6
2055; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
2056; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2057; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2058; SI-NEXT:    s_setpc_b64 s[30:31]
2059  %y = mul i16 %x, bitcast (half 4.0 to i16)
2060  store i16 %y, ptr addrspace(1) %out
2061  ret void
2062}
2063
2064define void @mul_inline_imm_neg_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2065; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
2066; GFX10:       ; %bb.0:
2067; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2068; GFX10-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2069; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2070; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2071;
2072; GFX11-LABEL: mul_inline_imm_neg_4.0_i16:
2073; GFX11:       ; %bb.0:
2074; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2075; GFX11-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2076; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2077; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2078;
2079; VI-LABEL: mul_inline_imm_neg_4.0_i16:
2080; VI:       ; %bb.0:
2081; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2082; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
2083; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2084; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2085; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2086;
2087; SI-LABEL: mul_inline_imm_neg_4.0_i16:
2088; SI:       ; %bb.0:
2089; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090; SI-NEXT:    s_mov_b32 s6, 0
2091; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2092; SI-NEXT:    s_mov_b32 s7, 0xf000
2093; SI-NEXT:    s_mov_b32 s4, s6
2094; SI-NEXT:    s_mov_b32 s5, s6
2095; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
2096; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2097; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2098; SI-NEXT:    s_setpc_b64 s[30:31]
2099  %y = mul i16 %x, bitcast (half -4.0 to i16)
2100  store i16 %y, ptr addrspace(1) %out
2101  ret void
2102}
2103
2104define void @mul_inline_imm_inv2pi_i16(ptr addrspace(1) %out, i16 %x) {
2105; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
2106; GFX10:       ; %bb.0:
2107; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2108; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2109; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2110; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2111;
2112; GFX11-LABEL: mul_inline_imm_inv2pi_i16:
2113; GFX11:       ; %bb.0:
2114; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2115; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2116; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2117; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2118;
2119; VI-LABEL: mul_inline_imm_inv2pi_i16:
2120; VI:       ; %bb.0:
2121; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2122; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
2123; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2124; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2125; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2126;
2127; SI-LABEL: mul_inline_imm_inv2pi_i16:
2128; SI:       ; %bb.0:
2129; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2130; SI-NEXT:    s_mov_b32 s6, 0
2131; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2132; SI-NEXT:    s_mov_b32 s7, 0xf000
2133; SI-NEXT:    s_mov_b32 s4, s6
2134; SI-NEXT:    s_mov_b32 s5, s6
2135; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
2136; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2137; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2138; SI-NEXT:    s_setpc_b64 s[30:31]
2139  %y = mul i16 %x, bitcast (half 0xH3118 to i16)
2140  store i16 %y, ptr addrspace(1) %out
2141  ret void
2142}
2143