xref: /llvm-project/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s | FileCheck -check-prefix=RRLIST %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s | FileCheck -check-prefix=FAST %s
4
5
6define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) {
7; RRLIST-LABEL: sccClobber:
8; RRLIST:       ; %bb.0: ; %entry
9; RRLIST-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
10; RRLIST-NEXT:    v_mov_b32_e32 v2, 0
11; RRLIST-NEXT:    s_waitcnt lgkmcnt(0)
12; RRLIST-NEXT:    s_load_dword s16, s[12:13], 0x0
13; RRLIST-NEXT:    s_load_dwordx2 s[0:1], s[10:11], 0x0
14; RRLIST-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
15; RRLIST-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x44
16; RRLIST-NEXT:    s_load_dword s17, s[14:15], 0x0
17; RRLIST-NEXT:    s_waitcnt lgkmcnt(0)
18; RRLIST-NEXT:    s_min_i32 s8, s16, 0
19; RRLIST-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
20; RRLIST-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
21; RRLIST-NEXT:    s_and_b64 s[4:5], vcc, exec
22; RRLIST-NEXT:    s_cselect_b32 s4, s16, s17
23; RRLIST-NEXT:    s_cmp_eq_u64 s[2:3], s[0:1]
24; RRLIST-NEXT:    s_cselect_b32 s0, s8, s4
25; RRLIST-NEXT:    v_mov_b32_e32 v0, s0
26; RRLIST-NEXT:    global_store_dword v2, v0, s[6:7]
27; RRLIST-NEXT:    s_endpgm
28;
29; FAST-LABEL: sccClobber:
30; FAST:       ; %bb.0: ; %entry
31; FAST-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
32; FAST-NEXT:    v_mov_b32_e32 v2, 0
33; FAST-NEXT:    s_waitcnt lgkmcnt(0)
34; FAST-NEXT:    s_load_dword s16, s[12:13], 0x0
35; FAST-NEXT:    s_load_dwordx2 s[0:1], s[10:11], 0x0
36; FAST-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
37; FAST-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x44
38; FAST-NEXT:    s_load_dword s17, s[14:15], 0x0
39; FAST-NEXT:    s_waitcnt lgkmcnt(0)
40; FAST-NEXT:    s_min_i32 s8, s16, 0
41; FAST-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
42; FAST-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
43; FAST-NEXT:    s_and_b64 s[4:5], vcc, exec
44; FAST-NEXT:    s_cselect_b32 s4, s16, s17
45; FAST-NEXT:    s_cmp_eq_u64 s[2:3], s[0:1]
46; FAST-NEXT:    s_cselect_b32 s0, s8, s4
47; FAST-NEXT:    v_mov_b32_e32 v0, s0
48; FAST-NEXT:    global_store_dword v2, v0, s[6:7]
49; FAST-NEXT:    s_endpgm
50entry:
51  %i = load i64, ptr addrspace(1) %a, align 8
52  %i.1 = load i64, ptr addrspace(1) %b, align 8
53  %i.2 = load i32, ptr addrspace(1) %e, align 4
54  %i.3 = load i32, ptr addrspace(1) %f, align 4
55  %cmp7.1 = icmp eq i64 %i, %i.1
56  %call.1 = tail call noundef i32 @llvm.smin.i32(i32 noundef 0, i32 noundef %i.2)
57  %cmp8.1 = icmp slt i64 %i, %i.1
58  %cond.1 = select i1 %cmp8.1, i32 %i.2, i32 %i.3
59  %cond14.1 = select i1 %cmp7.1, i32 %call.1, i32 %cond.1
60  store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4
61  ret void
62}
63
64declare i32 @llvm.smin.i32(i32, i32)
65