xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7
8define double @v_trig_preop_f64(double %a, i32 %b) {
9; GCN-LABEL: v_trig_preop_f64:
10; GCN:       ; %bb.0:
11; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GCN-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], v2
13; GCN-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX10PLUS-LABEL: v_trig_preop_f64:
16; GFX10PLUS:       ; %bb.0:
17; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX10PLUS-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], v2
19; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
20  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
21  ret double %result
22}
23
24define double @v_trig_preop_f64_imm(double %a, i32 %b) {
25; GCN-LABEL: v_trig_preop_f64_imm:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GCN-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], 7
29; GCN-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
32; GFX10PLUS:       ; %bb.0:
33; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10PLUS-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], 7
35; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
36  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
37  ret double %result
38}
39
40define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
41; CI-LABEL: s_trig_preop_f64:
42; CI:       ; %bb.0:
43; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
44; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
45; CI-NEXT:    s_waitcnt lgkmcnt(0)
46; CI-NEXT:    v_mov_b32_e32 v0, s2
47; CI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
48; CI-NEXT:    s_add_u32 s0, s0, 4
49; CI-NEXT:    s_addc_u32 s1, s1, 0
50; CI-NEXT:    v_mov_b32_e32 v3, s1
51; CI-NEXT:    v_mov_b32_e32 v2, s0
52; CI-NEXT:    flat_store_dword v[0:1], v0
53; CI-NEXT:    s_waitcnt vmcnt(0)
54; CI-NEXT:    flat_store_dword v[2:3], v1
55; CI-NEXT:    s_waitcnt vmcnt(0)
56; CI-NEXT:    s_endpgm
57;
58; VI-LABEL: s_trig_preop_f64:
59; VI:       ; %bb.0:
60; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
61; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
62; VI-NEXT:    s_waitcnt lgkmcnt(0)
63; VI-NEXT:    v_mov_b32_e32 v0, s2
64; VI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
65; VI-NEXT:    s_add_u32 s0, s0, 4
66; VI-NEXT:    s_addc_u32 s1, s1, 0
67; VI-NEXT:    v_mov_b32_e32 v3, s1
68; VI-NEXT:    v_mov_b32_e32 v2, s0
69; VI-NEXT:    flat_store_dword v[0:1], v0
70; VI-NEXT:    s_waitcnt vmcnt(0)
71; VI-NEXT:    flat_store_dword v[2:3], v1
72; VI-NEXT:    s_waitcnt vmcnt(0)
73; VI-NEXT:    s_endpgm
74;
75; GFX9-LABEL: s_trig_preop_f64:
76; GFX9:       ; %bb.0:
77; GFX9-NEXT:    s_load_dword s2, s[8:9], 0x8
78; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
79; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
80; GFX9-NEXT:    v_mov_b32_e32 v0, s2
81; GFX9-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
82; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
83; GFX9-NEXT:    s_waitcnt vmcnt(0)
84; GFX9-NEXT:    s_endpgm
85;
86; GFX10-LABEL: s_trig_preop_f64:
87; GFX10:       ; %bb.0:
88; GFX10-NEXT:    s_clause 0x1
89; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
90; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8
91; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
92; GFX10-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], s2
93; GFX10-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
94; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
95; GFX10-NEXT:    s_endpgm
96;
97; GFX11-LABEL: s_trig_preop_f64:
98; GFX11:       ; %bb.0:
99; GFX11-NEXT:    s_clause 0x1
100; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
101; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8
102; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX11-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], s2
104; GFX11-NEXT:    flat_store_b64 v[0:1], v[0:1] dlc
105; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
106; GFX11-NEXT:    s_endpgm
107  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
108  store volatile double %result, ptr undef
109  ret void
110}
111
112define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
113; CI-LABEL: s_trig_preop_f64_imm:
114; CI:       ; %bb.0:
115; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
116; CI-NEXT:    s_waitcnt lgkmcnt(0)
117; CI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
118; CI-NEXT:    s_add_u32 s0, s0, 4
119; CI-NEXT:    s_addc_u32 s1, s1, 0
120; CI-NEXT:    v_mov_b32_e32 v3, s1
121; CI-NEXT:    v_mov_b32_e32 v2, s0
122; CI-NEXT:    flat_store_dword v[0:1], v0
123; CI-NEXT:    s_waitcnt vmcnt(0)
124; CI-NEXT:    flat_store_dword v[2:3], v1
125; CI-NEXT:    s_waitcnt vmcnt(0)
126; CI-NEXT:    s_endpgm
127;
128; VI-LABEL: s_trig_preop_f64_imm:
129; VI:       ; %bb.0:
130; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
131; VI-NEXT:    s_waitcnt lgkmcnt(0)
132; VI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
133; VI-NEXT:    s_add_u32 s0, s0, 4
134; VI-NEXT:    s_addc_u32 s1, s1, 0
135; VI-NEXT:    v_mov_b32_e32 v3, s1
136; VI-NEXT:    v_mov_b32_e32 v2, s0
137; VI-NEXT:    flat_store_dword v[0:1], v0
138; VI-NEXT:    s_waitcnt vmcnt(0)
139; VI-NEXT:    flat_store_dword v[2:3], v1
140; VI-NEXT:    s_waitcnt vmcnt(0)
141; VI-NEXT:    s_endpgm
142;
143; GFX9-LABEL: s_trig_preop_f64_imm:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
146; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
147; GFX9-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
148; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
149; GFX9-NEXT:    s_waitcnt vmcnt(0)
150; GFX9-NEXT:    s_endpgm
151;
152; GFX10-LABEL: s_trig_preop_f64_imm:
153; GFX10:       ; %bb.0:
154; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
155; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX10-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
157; GFX10-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
158; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
159; GFX10-NEXT:    s_endpgm
160;
161; GFX11-LABEL: s_trig_preop_f64_imm:
162; GFX11:       ; %bb.0:
163; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
164; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
165; GFX11-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
166; GFX11-NEXT:    flat_store_b64 v[0:1], v[0:1] dlc
167; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
168; GFX11-NEXT:    s_endpgm
169  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
170  store volatile double %result, ptr undef
171  ret void
172}
173
174declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
175
176attributes #0 = { nounwind readnone speculatable }
177