xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bfm.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
4
5define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
6; SI-LABEL: s_bfm_pattern:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9; SI-NEXT:    s_mov_b32 s7, 0xf000
10; SI-NEXT:    s_waitcnt lgkmcnt(0)
11; SI-NEXT:    s_bfm_b32 s2, s2, s3
12; SI-NEXT:    s_mov_b32 s6, -1
13; SI-NEXT:    s_mov_b32 s4, s0
14; SI-NEXT:    s_mov_b32 s5, s1
15; SI-NEXT:    v_mov_b32_e32 v0, s2
16; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
17; SI-NEXT:    s_endpgm
18;
19; VI-LABEL: s_bfm_pattern:
20; VI:       ; %bb.0:
21; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
22; VI-NEXT:    s_waitcnt lgkmcnt(0)
23; VI-NEXT:    s_bfm_b32 s2, s2, s3
24; VI-NEXT:    v_mov_b32_e32 v0, s0
25; VI-NEXT:    v_mov_b32_e32 v1, s1
26; VI-NEXT:    v_mov_b32_e32 v2, s2
27; VI-NEXT:    flat_store_dword v[0:1], v2
28; VI-NEXT:    s_endpgm
29  %a = shl i32 1, %x
30  %b = sub i32 %a, 1
31  %c = shl i32 %b, %y
32  store i32 %c, ptr addrspace(1) %out
33  ret void
34}
35
36define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
37; SI-LABEL: s_bfm_pattern_simple:
38; SI:       ; %bb.0:
39; SI-NEXT:    s_load_dword s2, s[4:5], 0xb
40; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
41; SI-NEXT:    s_mov_b32 s3, 0xf000
42; SI-NEXT:    s_waitcnt lgkmcnt(0)
43; SI-NEXT:    s_bfm_b32 s4, s2, 0
44; SI-NEXT:    s_mov_b32 s2, -1
45; SI-NEXT:    v_mov_b32_e32 v0, s4
46; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
47; SI-NEXT:    s_endpgm
48;
49; VI-LABEL: s_bfm_pattern_simple:
50; VI:       ; %bb.0:
51; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
52; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
53; VI-NEXT:    s_waitcnt lgkmcnt(0)
54; VI-NEXT:    s_bfm_b32 s2, s2, 0
55; VI-NEXT:    v_mov_b32_e32 v0, s0
56; VI-NEXT:    v_mov_b32_e32 v1, s1
57; VI-NEXT:    v_mov_b32_e32 v2, s2
58; VI-NEXT:    flat_store_dword v[0:1], v2
59; VI-NEXT:    s_endpgm
60  %a = shl i32 1, %x
61  %b = sub i32 %a, 1
62  store i32 %b, ptr addrspace(1) %out
63  ret void
64}
65
66define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
67; SI-LABEL: v_bfm_pattern:
68; SI:       ; %bb.0:
69; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; SI-NEXT:    s_mov_b32 s7, 0xf000
71; SI-NEXT:    s_mov_b32 s6, 0
72; SI-NEXT:    v_bfm_b32_e32 v2, v2, v3
73; SI-NEXT:    s_mov_b32 s4, s6
74; SI-NEXT:    s_mov_b32 s5, s6
75; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
76; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
77; SI-NEXT:    s_setpc_b64 s[30:31]
78;
79; VI-LABEL: v_bfm_pattern:
80; VI:       ; %bb.0:
81; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; VI-NEXT:    v_bfm_b32 v2, v2, v3
83; VI-NEXT:    flat_store_dword v[0:1], v2
84; VI-NEXT:    s_waitcnt vmcnt(0)
85; VI-NEXT:    s_setpc_b64 s[30:31]
86  %a = shl i32 1, %x
87  %b = sub i32 %a, 1
88  %c = shl i32 %b, %y
89  store i32 %c, ptr addrspace(1) %out
90  ret void
91}
92
93define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
94; SI-LABEL: v_bfm_pattern_simple:
95; SI:       ; %bb.0:
96; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; SI-NEXT:    s_mov_b32 s7, 0xf000
98; SI-NEXT:    s_mov_b32 s6, 0
99; SI-NEXT:    v_bfm_b32_e64 v2, v2, 0
100; SI-NEXT:    s_mov_b32 s4, s6
101; SI-NEXT:    s_mov_b32 s5, s6
102; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
103; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
104; SI-NEXT:    s_setpc_b64 s[30:31]
105;
106; VI-LABEL: v_bfm_pattern_simple:
107; VI:       ; %bb.0:
108; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; VI-NEXT:    v_bfm_b32 v2, v2, 0
110; VI-NEXT:    flat_store_dword v[0:1], v2
111; VI-NEXT:    s_waitcnt vmcnt(0)
112; VI-NEXT:    s_setpc_b64 s[30:31]
113  %a = shl i32 1, %x
114  %b = sub i32 %a, 1
115  store i32 %b, ptr addrspace(1) %out
116  ret void
117}
118
119attributes #0 = { nounwind }
120