xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll (revision 2d6d723a85c2d007b0359c206d66cd2e5a9f00e1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
5
6define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
7; GFX11-LABEL: v_interp_f32:
8; GFX11:       ; %bb.0: ; %main_body
9; GFX11-NEXT:    s_mov_b32 s3, exec_lo
10; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
11; GFX11-NEXT:    s_mov_b32 m0, s2
12; GFX11-NEXT:    lds_param_load v0, attr0.y wait_vdst:15
13; GFX11-NEXT:    lds_param_load v1, attr1.x wait_vdst:15
14; GFX11-NEXT:    s_mov_b32 exec_lo, s3
15; GFX11-NEXT:    v_mov_b32_e32 v2, s0
16; GFX11-NEXT:    v_mov_b32_e32 v4, s1
17; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
18; GFX11-NEXT:    v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
19; GFX11-NEXT:    v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0
20; GFX11-NEXT:    v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
21; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
22; GFX11-NEXT:    v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
23; GFX11-NEXT:    exp mrt0 v3, v2, v5, v4 done
24; GFX11-NEXT:    s_endpgm
25;
26; GFX12-LABEL: v_interp_f32:
27; GFX12:       ; %bb.0: ; %main_body
28; GFX12-NEXT:    s_mov_b32 s3, exec_lo
29; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
30; GFX12-NEXT:    s_mov_b32 m0, s2
31; GFX12-NEXT:    ds_param_load v0, attr0.y wait_va_vdst:15 wait_vm_vsrc:1
32; GFX12-NEXT:    ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1
33; GFX12-NEXT:    s_mov_b32 exec_lo, s3
34; GFX12-NEXT:    v_mov_b32_e32 v2, s0
35; GFX12-NEXT:    v_mov_b32_e32 v4, s1
36; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
37; GFX12-NEXT:    v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
38; GFX12-NEXT:    v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0
39; GFX12-NEXT:    v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
40; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
41; GFX12-NEXT:    v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
42; GFX12-NEXT:    export mrt0 v3, v2, v5, v4 done
43; GFX12-NEXT:    s_endpgm
44main_body:
45  %p0 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %m0)
46  %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
47  %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
48  %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
49  %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
50  %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
51  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0
52  ret void
53}
54
55define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
56; GFX11-LABEL: v_interp_f32_many:
57; GFX11:       ; %bb.0: ; %main_body
58; GFX11-NEXT:    s_mov_b32 s3, exec_lo
59; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
60; GFX11-NEXT:    s_mov_b32 m0, s2
61; GFX11-NEXT:    lds_param_load v0, attr0.x wait_vdst:15
62; GFX11-NEXT:    lds_param_load v1, attr1.x wait_vdst:15
63; GFX11-NEXT:    lds_param_load v2, attr2.x wait_vdst:15
64; GFX11-NEXT:    lds_param_load v3, attr3.x wait_vdst:15
65; GFX11-NEXT:    s_mov_b32 exec_lo, s3
66; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
67; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
68; GFX11-NEXT:    v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
69; GFX11-NEXT:    v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
70; GFX11-NEXT:    v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
71; GFX11-NEXT:    v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0
72; GFX11-NEXT:    v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
73; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
74; GFX11-NEXT:    v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
75; GFX11-NEXT:    v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
76; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
77; GFX11-NEXT:    v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
78; GFX11-NEXT:    exp mrt0 v6, v7, v8, v4 done
79; GFX11-NEXT:    s_endpgm
80;
81; GFX12-LABEL: v_interp_f32_many:
82; GFX12:       ; %bb.0: ; %main_body
83; GFX12-NEXT:    s_mov_b32 s3, exec_lo
84; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
85; GFX12-NEXT:    s_mov_b32 m0, s2
86; GFX12-NEXT:    ds_param_load v0, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
87; GFX12-NEXT:    ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1
88; GFX12-NEXT:    ds_param_load v2, attr2.x wait_va_vdst:15 wait_vm_vsrc:1
89; GFX12-NEXT:    ds_param_load v3, attr3.x wait_va_vdst:15 wait_vm_vsrc:1
90; GFX12-NEXT:    s_mov_b32 exec_lo, s3
91; GFX12-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
92; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
93; GFX12-NEXT:    v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
94; GFX12-NEXT:    v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
95; GFX12-NEXT:    v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
96; GFX12-NEXT:    v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0
97; GFX12-NEXT:    v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
98; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
99; GFX12-NEXT:    v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
100; GFX12-NEXT:    v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
101; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
102; GFX12-NEXT:    v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
103; GFX12-NEXT:    export mrt0 v6, v7, v8, v4 done
104; GFX12-NEXT:    s_endpgm
105main_body:
106  %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
107  %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
108  %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
109  %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
110  %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
111  %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
112  %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
113  %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
114  %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
115  %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
116  %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
117  %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
118  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
119  ret void
120}
121
122define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
123; GFX11-LABEL: v_interp_f32_many_vm:
124; GFX11:       ; %bb.0: ; %main_body
125; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
126; GFX11-NEXT:    s_mov_b32 m0, s0
127; GFX11-NEXT:    s_mov_b32 s0, exec_lo
128; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
129; GFX11-NEXT:    lds_param_load v2, attr0.x wait_vdst:15
130; GFX11-NEXT:    lds_param_load v3, attr1.x wait_vdst:15
131; GFX11-NEXT:    lds_param_load v4, attr2.x wait_vdst:15
132; GFX11-NEXT:    lds_param_load v5, attr3.x wait_vdst:15
133; GFX11-NEXT:    s_mov_b32 exec_lo, s0
134; GFX11-NEXT:    s_waitcnt vmcnt(0)
135; GFX11-NEXT:    v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
136; GFX11-NEXT:    v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
137; GFX11-NEXT:    v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
138; GFX11-NEXT:    v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0
139; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
140; GFX11-NEXT:    v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
141; GFX11-NEXT:    v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
142; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
143; GFX11-NEXT:    v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
144; GFX11-NEXT:    v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
145; GFX11-NEXT:    exp mrt0 v6, v7, v8, v0 done
146; GFX11-NEXT:    s_endpgm
147;
148; GFX12-LABEL: v_interp_f32_many_vm:
149; GFX12:       ; %bb.0: ; %main_body
150; GFX12-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
151; GFX12-NEXT:    s_mov_b32 m0, s0
152; GFX12-NEXT:    s_mov_b32 s0, exec_lo
153; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
154; GFX12-NEXT:    ds_param_load v2, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
155; GFX12-NEXT:    ds_param_load v3, attr1.x wait_va_vdst:15 wait_vm_vsrc:1
156; GFX12-NEXT:    ds_param_load v4, attr2.x wait_va_vdst:15 wait_vm_vsrc:1
157; GFX12-NEXT:    ds_param_load v5, attr3.x wait_va_vdst:15 wait_vm_vsrc:1
158; GFX12-NEXT:    s_mov_b32 exec_lo, s0
159; GFX12-NEXT:    s_wait_loadcnt 0x0
160; GFX12-NEXT:    v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
161; GFX12-NEXT:    v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
162; GFX12-NEXT:    v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
163; GFX12-NEXT:    v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0
164; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
165; GFX12-NEXT:    v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
166; GFX12-NEXT:    v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
167; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
168; GFX12-NEXT:    v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
169; GFX12-NEXT:    v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
170; GFX12-NEXT:    export mrt0 v6, v7, v8, v0 done
171; GFX12-NEXT:    s_endpgm
172main_body:
173  %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1
174  %i = load float, ptr addrspace(1) %i.ptr, align 4
175  %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2
176  %j = load float, ptr addrspace(1) %j.ptr, align 4
177  %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
178  %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
179  %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
180  %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
181  %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
182  %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
183  %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
184  %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
185  %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
186  %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
187  %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
188  %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
189  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
190  ret void
191}
192
193define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
194; GFX11-TRUE16-LABEL: v_interp_f16:
195; GFX11-TRUE16:       ; %bb.0: ; %main_body
196; GFX11-TRUE16-NEXT:    s_mov_b32 s3, exec_lo
197; GFX11-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo
198; GFX11-TRUE16-NEXT:    s_mov_b32 m0, s2
199; GFX11-TRUE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15
200; GFX11-TRUE16-NEXT:    s_mov_b32 exec_lo, s3
201; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
202; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, s1
203; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
204; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v3, v1.l, v0, v1.l wait_exp:0
205; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v4, v1.h, v0, v1.h wait_exp:7
206; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
207; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
208; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7
209; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h
210; GFX11-TRUE16-NEXT:    ; return to shader part epilog
211;
212; GFX11-FAKE16-LABEL: v_interp_f16:
213; GFX11-FAKE16:       ; %bb.0: ; %main_body
214; GFX11-FAKE16-NEXT:    s_mov_b32 s3, exec_lo
215; GFX11-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo
216; GFX11-FAKE16-NEXT:    s_mov_b32 m0, s2
217; GFX11-FAKE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15
218; GFX11-FAKE16-NEXT:    s_mov_b32 exec_lo, s3
219; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
220; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1
221; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
222; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
223; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
224; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
225; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
226; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
227; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0
228; GFX11-FAKE16-NEXT:    ; return to shader part epilog
229;
230; GFX12-LABEL: v_interp_f16:
231; GFX12:       ; %bb.0: ; %main_body
232; GFX12-NEXT:    s_mov_b32 s3, exec_lo
233; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
234; GFX12-NEXT:    s_mov_b32 m0, s2
235; GFX12-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
236; GFX12-NEXT:    s_mov_b32 exec_lo, s3
237; GFX12-NEXT:    v_mov_b32_e32 v0, s0
238; GFX12-NEXT:    v_mov_b32_e32 v2, s1
239; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
240; GFX12-NEXT:    v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
241; GFX12-NEXT:    v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
242; GFX12-NEXT:    v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
243; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
244; GFX12-NEXT:    v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
245; GFX12-NEXT:    v_add_f16_e32 v0, v3, v0
246; GFX12-NEXT:    ; return to shader part epilog
247main_body:
248  %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
249  %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 0)
250  %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %l_p0, i1 0)
251  %h_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 1)
252  %h_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %h_p0, i1 1)
253  %res = fadd half %l_p1, %h_p1
254  ret half %res
255}
256
257define amdgpu_ps half @v_interp_rtz_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
258; GFX11-TRUE16-LABEL: v_interp_rtz_f16:
259; GFX11-TRUE16:       ; %bb.0: ; %main_body
260; GFX11-TRUE16-NEXT:    s_mov_b32 s3, exec_lo
261; GFX11-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo
262; GFX11-TRUE16-NEXT:    s_mov_b32 m0, s2
263; GFX11-TRUE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15
264; GFX11-TRUE16-NEXT:    s_mov_b32 exec_lo, s3
265; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
266; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, s1
267; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
268; GFX11-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1.l, v0, v1.l wait_exp:0
269; GFX11-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v4, v1.h, v0, v1.h wait_exp:7
270; GFX11-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
271; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
272; GFX11-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7
273; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h
274; GFX11-TRUE16-NEXT:    ; return to shader part epilog
275;
276; GFX11-FAKE16-LABEL: v_interp_rtz_f16:
277; GFX11-FAKE16:       ; %bb.0: ; %main_body
278; GFX11-FAKE16-NEXT:    s_mov_b32 s3, exec_lo
279; GFX11-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo
280; GFX11-FAKE16-NEXT:    s_mov_b32 m0, s2
281; GFX11-FAKE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15
282; GFX11-FAKE16-NEXT:    s_mov_b32 exec_lo, s3
283; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
284; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1
285; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
286; GFX11-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0
287; GFX11-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
288; GFX11-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7
289; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
290; GFX11-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
291; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0
292; GFX11-FAKE16-NEXT:    ; return to shader part epilog
293;
294; GFX12-LABEL: v_interp_rtz_f16:
295; GFX12:       ; %bb.0: ; %main_body
296; GFX12-NEXT:    s_mov_b32 s3, exec_lo
297; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
298; GFX12-NEXT:    s_mov_b32 m0, s2
299; GFX12-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1
300; GFX12-NEXT:    s_mov_b32 exec_lo, s3
301; GFX12-NEXT:    v_mov_b32_e32 v0, s0
302; GFX12-NEXT:    v_mov_b32_e32 v2, s1
303; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
304; GFX12-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0
305; GFX12-NEXT:    v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
306; GFX12-NEXT:    v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7
307; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
308; GFX12-NEXT:    v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
309; GFX12-NEXT:    v_add_f16_e32 v0, v3, v0
310; GFX12-NEXT:    ; return to shader part epilog
311main_body:
312  %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
313  %l_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 0)
314  %l_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %l_p0, i1 0)
315  %h_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 1)
316  %h_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %h_p0, i1 1)
317  %res = fadd half %l_p1, %h_p1
318  ret half %res
319}
320
321define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
322; GFX11-TRUE16-LABEL: v_interp_f16_imm_params:
323; GFX11-TRUE16:       ; %bb.0: ; %main_body
324; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0
325; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
326; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v3, s1
327; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
328; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
329; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
330; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
331; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1
332; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.h, v0.l
333; GFX11-TRUE16-NEXT:    ; return to shader part epilog
334;
335; GFX11-FAKE16-LABEL: v_interp_f16_imm_params:
336; GFX11-FAKE16:       ; %bb.0: ; %main_body
337; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
338; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1
339; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
340; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
341; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
342; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
343; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
344; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v1, v0
345; GFX11-FAKE16-NEXT:    ; return to shader part epilog
346;
347; GFX12-LABEL: v_interp_f16_imm_params:
348; GFX12:       ; %bb.0: ; %main_body
349; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
350; GFX12-NEXT:    v_mov_b32_e32 v2, s1
351; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
352; GFX12-NEXT:    v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
353; GFX12-NEXT:    v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
354; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
355; GFX12-NEXT:    v_cvt_f16_f32_e32 v1, v1
356; GFX12-NEXT:    v_add_f16_e32 v0, v1, v0
357; GFX12-NEXT:    ; return to shader part epilog
358main_body:
359  %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0)
360  %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float 0.0, float %j, float 0.0, i1 0)
361  %h = fptrunc float %l_p0 to half
362  %res = fadd half %h, %l_p1
363  ret half %res
364}
365
366declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
367declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
368declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
369declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
370declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
371declare float @llvm.amdgcn.interp.p10.rtz.f16(float, float, float, i1) #0
372declare half @llvm.amdgcn.interp.p2.rtz.f16(float, float, float, i1) #0
373declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
374declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
375
376attributes #0 = { nounwind }
377attributes #1 = { nounwind readnone }
378