xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
6
7declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32)
8declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32)
9declare i32 @llvm.amdgcn.workitem.id.x()
10
11define amdgpu_kernel void @undef_i32() #0 {
12; GFX11-LABEL: undef_i32:
13; GFX11:       ; %bb.0:
14; GFX11-NEXT:    s_mov_b32 m0, 0
15; GFX11-NEXT:    exp pos0 off, off, off, off row_en
16; GFX11-NEXT:    exp pos1 off, off, off, off done row_en
17; GFX11-NEXT:    s_endpgm
18;
19; GFX12-LABEL: undef_i32:
20; GFX12:       ; %bb.0:
21; GFX12-NEXT:    s_mov_b32 m0, 0
22; GFX12-NEXT:    export pos0 off, off, off, off row_en
23; GFX12-NEXT:    export pos1 off, off, off, off done row_en
24; GFX12-NEXT:    s_endpgm
25  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 false, i32 0)
26  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
27  ret void
28}
29
30define amdgpu_kernel void @undef_f32() #0 {
31; GFX11-LABEL: undef_f32:
32; GFX11:       ; %bb.0:
33; GFX11-NEXT:    s_mov_b32 m0, 0
34; GFX11-NEXT:    exp pos0 off, off, off, off row_en
35; GFX11-NEXT:    exp pos1 off, off, off, off done row_en
36; GFX11-NEXT:    s_endpgm
37;
38; GFX12-LABEL: undef_f32:
39; GFX12:       ; %bb.0:
40; GFX12-NEXT:    s_mov_b32 m0, 0
41; GFX12-NEXT:    export pos0 off, off, off, off row_en
42; GFX12-NEXT:    export pos1 off, off, off, off done row_en
43; GFX12-NEXT:    s_endpgm
44  call void @llvm.amdgcn.exp.row.f32(i32 12, i32 0, float undef, float undef, float undef, float undef, i1 false, i32 0)
45  call void @llvm.amdgcn.exp.row.f32(i32 13, i32 0, float undef, float undef, float undef, float undef, i1 true, i32 0)
46  ret void
47}
48
49define amdgpu_kernel void @zero_i32() #0 {
50; GFX11-LABEL: zero_i32:
51; GFX11:       ; %bb.0:
52; GFX11-NEXT:    v_mov_b32_e32 v0, 0
53; GFX11-NEXT:    s_mov_b32 m0, 0
54; GFX11-NEXT:    exp pos0 v0, v0, v0, off row_en
55; GFX11-NEXT:    exp pos1 v0, v0, v0, off done row_en
56; GFX11-NEXT:    s_endpgm
57;
58; GFX12-LABEL: zero_i32:
59; GFX12:       ; %bb.0:
60; GFX12-NEXT:    v_mov_b32_e32 v0, 0
61; GFX12-NEXT:    s_mov_b32 m0, 0
62; GFX12-NEXT:    export pos0 v0, v0, v0, off row_en
63; GFX12-NEXT:    export pos1 v0, v0, v0, off done row_en
64; GFX12-NEXT:    s_endpgm
65  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 false, i32 0)
66  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 true, i32 0)
67  ret void
68}
69
70define amdgpu_kernel void @one_f32() #0 {
71; GFX11-LABEL: one_f32:
72; GFX11:       ; %bb.0:
73; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
74; GFX11-NEXT:    s_mov_b32 m0, 0
75; GFX11-NEXT:    exp pos0 v0, v0, v0, off row_en
76; GFX11-NEXT:    exp pos1 v0, v0, v0, off done row_en
77; GFX11-NEXT:    s_endpgm
78;
79; GFX12-LABEL: one_f32:
80; GFX12:       ; %bb.0:
81; GFX12-NEXT:    v_mov_b32_e32 v0, 1.0
82; GFX12-NEXT:    s_mov_b32 m0, 0
83; GFX12-NEXT:    export pos0 v0, v0, v0, off row_en
84; GFX12-NEXT:    export pos1 v0, v0, v0, off done row_en
85; GFX12-NEXT:    s_endpgm
86  call void @llvm.amdgcn.exp.row.f32(i32 12, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 false, i32 0)
87  call void @llvm.amdgcn.exp.row.f32(i32 13, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 true, i32 0)
88  ret void
89}
90
91define amdgpu_kernel void @id_i32() #0 {
92; GFX11-LABEL: id_i32:
93; GFX11:       ; %bb.0:
94; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
95; GFX11-NEXT:    s_mov_b32 m0, 0
96; GFX11-NEXT:    exp pos0 v0, off, off, off done row_en
97; GFX11-NEXT:    s_endpgm
98;
99; GFX12-LABEL: id_i32:
100; GFX12:       ; %bb.0:
101; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
102; GFX12-NEXT:    s_mov_b32 m0, 0
103; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
104; GFX12-NEXT:    s_endpgm
105  %id = call i32 @llvm.amdgcn.workitem.id.x()
106  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
107  ret void
108}
109
110define amdgpu_kernel void @id_arg_i32(i32 %row) #0 {
111; GFX11-LABEL: id_arg_i32:
112; GFX11:       ; %bb.0:
113; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x24
114; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
115; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
116; GFX11-NEXT:    s_mov_b32 m0, s0
117; GFX11-NEXT:    exp pos0 v0, off, off, off done row_en
118; GFX11-NEXT:    s_endpgm
119;
120; GFX12-LABEL: id_arg_i32:
121; GFX12:       ; %bb.0:
122; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x24
123; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
124; GFX12-NEXT:    s_wait_kmcnt 0x0
125; GFX12-NEXT:    s_mov_b32 m0, s0
126; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
127; GFX12-NEXT:    s_endpgm
128  %id = call i32 @llvm.amdgcn.workitem.id.x()
129  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 %row)
130  ret void
131}
132
133; Divergent row number just causes a readfirstlane for now.
134define amdgpu_kernel void @id_row_i32() #0 {
135; GFX11-SDAG-LABEL: id_row_i32:
136; GFX11-SDAG:       ; %bb.0:
137; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
138; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
139; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
140; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0x63
141; GFX11-SDAG-NEXT:    s_mov_b32 m0, s0
142; GFX11-SDAG-NEXT:    exp pos0 v0, off, off, off done row_en
143; GFX11-SDAG-NEXT:    s_endpgm
144;
145; GFX11-GISEL-LABEL: id_row_i32:
146; GFX11-GISEL:       ; %bb.0:
147; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
148; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0x63
149; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
150; GFX11-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
151; GFX11-GISEL-NEXT:    exp pos0 v1, off, off, off done row_en
152; GFX11-GISEL-NEXT:    s_endpgm
153;
154; GFX12-SDAG-LABEL: id_row_i32:
155; GFX12-SDAG:       ; %bb.0:
156; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
157; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
158; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
159; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0x63
160; GFX12-SDAG-NEXT:    s_mov_b32 m0, s0
161; GFX12-SDAG-NEXT:    export pos0 v0, off, off, off done row_en
162; GFX12-SDAG-NEXT:    s_endpgm
163;
164; GFX12-GISEL-LABEL: id_row_i32:
165; GFX12-GISEL:       ; %bb.0:
166; GFX12-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
167; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0x63
168; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
169; GFX12-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
170; GFX12-GISEL-NEXT:    export pos0 v1, off, off, off done row_en
171; GFX12-GISEL-NEXT:    s_endpgm
172  %id = call i32 @llvm.amdgcn.workitem.id.x()
173  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 99, i32 undef, i32 undef, i32 undef, i1 true, i32 %id)
174  ret void
175}
176