xref: /llvm-project/llvm/test/CodeGen/AMDGPU/bfe-combine.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck %s --check-prefixes=VI
3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck %s --check-prefixes=VI-SDWA
4; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck %s --check-prefixes=CI
5
6define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x) {
7; VI-LABEL: bfe_combine8:
8; VI:       ; %bb.0:
9; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
10; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
11; VI-NEXT:    s_waitcnt lgkmcnt(0)
12; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
13; VI-NEXT:    v_bfe_u32 v0, v0, 8, 8
14; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
15; VI-NEXT:    v_mov_b32_e32 v1, s1
16; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
17; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
18; VI-NEXT:    flat_load_dword v2, v[0:1]
19; VI-NEXT:    v_mov_b32_e32 v0, s0
20; VI-NEXT:    v_mov_b32_e32 v1, s1
21; VI-NEXT:    s_waitcnt vmcnt(0)
22; VI-NEXT:    flat_store_dword v[0:1], v2
23; VI-NEXT:    s_endpgm
24;
25; VI-SDWA-LABEL: bfe_combine8:
26; VI-SDWA:       ; %bb.0:
27; VI-SDWA-NEXT:    s_load_dword s2, s[4:5], 0x2c
28; VI-SDWA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
29; VI-SDWA-NEXT:    v_mov_b32_e32 v1, 2
30; VI-SDWA-NEXT:    s_waitcnt lgkmcnt(0)
31; VI-SDWA-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
32; VI-SDWA-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
33; VI-SDWA-NEXT:    v_mov_b32_e32 v1, s1
34; VI-SDWA-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
35; VI-SDWA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
36; VI-SDWA-NEXT:    flat_load_dword v2, v[0:1]
37; VI-SDWA-NEXT:    v_mov_b32_e32 v0, s0
38; VI-SDWA-NEXT:    v_mov_b32_e32 v1, s1
39; VI-SDWA-NEXT:    s_waitcnt vmcnt(0)
40; VI-SDWA-NEXT:    flat_store_dword v[0:1], v2
41; VI-SDWA-NEXT:    s_endpgm
42;
43; CI-LABEL: bfe_combine8:
44; CI:       ; %bb.0:
45; CI-NEXT:    s_load_dword s2, s[4:5], 0xb
46; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
47; CI-NEXT:    s_mov_b32 s3, 0xf000
48; CI-NEXT:    s_mov_b32 s6, 0
49; CI-NEXT:    s_mov_b32 s7, s3
50; CI-NEXT:    s_waitcnt lgkmcnt(0)
51; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
52; CI-NEXT:    v_lshrrev_b32_e32 v0, 6, v0
53; CI-NEXT:    s_mov_b64 s[4:5], s[0:1]
54; CI-NEXT:    v_and_b32_e32 v0, 0x3fc, v0
55; CI-NEXT:    v_mov_b32_e32 v1, 0
56; CI-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
57; CI-NEXT:    s_mov_b32 s2, -1
58; CI-NEXT:    s_waitcnt vmcnt(0)
59; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
60; CI-NEXT:    s_endpgm
61  %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
62  %idx = add i32 %x, %id
63  %srl = lshr i32 %idx, 8
64  %and = and i32 %srl, 255
65  %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and
66  %val = load i32, ptr addrspace(1) %ptr, align 4
67  store i32 %val, ptr addrspace(1) %arg, align 4
68  ret void
69}
70
71define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x) {
72; VI-LABEL: bfe_combine16:
73; VI:       ; %bb.0:
74; VI-NEXT:    s_load_dword s2, s[4:5], 0x2c
75; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
76; VI-NEXT:    v_mov_b32_e32 v1, 0
77; VI-NEXT:    s_waitcnt lgkmcnt(0)
78; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
79; VI-NEXT:    v_bfe_u32 v0, v0, 16, 16
80; VI-NEXT:    v_lshlrev_b32_e32 v0, 15, v0
81; VI-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
82; VI-NEXT:    v_mov_b32_e32 v2, s1
83; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
84; VI-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
85; VI-NEXT:    flat_load_dword v2, v[0:1]
86; VI-NEXT:    v_mov_b32_e32 v0, s0
87; VI-NEXT:    v_mov_b32_e32 v1, s1
88; VI-NEXT:    s_waitcnt vmcnt(0)
89; VI-NEXT:    flat_store_dword v[0:1], v2
90; VI-NEXT:    s_endpgm
91;
92; VI-SDWA-LABEL: bfe_combine16:
93; VI-SDWA:       ; %bb.0:
94; VI-SDWA-NEXT:    s_load_dword s2, s[4:5], 0x2c
95; VI-SDWA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
96; VI-SDWA-NEXT:    v_mov_b32_e32 v1, 15
97; VI-SDWA-NEXT:    s_waitcnt lgkmcnt(0)
98; VI-SDWA-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
99; VI-SDWA-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
100; VI-SDWA-NEXT:    v_mov_b32_e32 v1, 0
101; VI-SDWA-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
102; VI-SDWA-NEXT:    v_mov_b32_e32 v2, s1
103; VI-SDWA-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
104; VI-SDWA-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
105; VI-SDWA-NEXT:    flat_load_dword v2, v[0:1]
106; VI-SDWA-NEXT:    v_mov_b32_e32 v0, s0
107; VI-SDWA-NEXT:    v_mov_b32_e32 v1, s1
108; VI-SDWA-NEXT:    s_waitcnt vmcnt(0)
109; VI-SDWA-NEXT:    flat_store_dword v[0:1], v2
110; VI-SDWA-NEXT:    s_endpgm
111;
112; CI-LABEL: bfe_combine16:
113; CI:       ; %bb.0:
114; CI-NEXT:    s_load_dword s2, s[4:5], 0xb
115; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
116; CI-NEXT:    v_mov_b32_e32 v1, 0
117; CI-NEXT:    s_mov_b32 s3, 0xf000
118; CI-NEXT:    s_mov_b32 s6, 0
119; CI-NEXT:    s_waitcnt lgkmcnt(0)
120; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
121; CI-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
122; CI-NEXT:    v_and_b32_e32 v0, 0x7fff8000, v0
123; CI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
124; CI-NEXT:    s_mov_b32 s7, s3
125; CI-NEXT:    s_mov_b64 s[4:5], s[0:1]
126; CI-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
127; CI-NEXT:    s_mov_b32 s2, -1
128; CI-NEXT:    s_waitcnt vmcnt(0)
129; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
130; CI-NEXT:    s_endpgm
131  %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
132  %idx = add i32 %x, %id
133  %srl = lshr i32 %idx, 1
134  %and = and i32 %srl, 2147450880
135  %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and
136  %val = load i32, ptr addrspace(1) %ptr, align 4
137  store i32 %val, ptr addrspace(1) %arg, align 4
138  ret void
139}
140
141declare i32 @llvm.amdgcn.workitem.id.x() #1
142