xref: /llvm-project/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll (revision 25506f48643b65e48c6bd501855589fff8983933)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=hawaii %s -o - | FileCheck %s
3
4; For all these tests we disable optimizations through function attributes
5; because the code we are exercising here needs phis and we want to keep the
6; IR small.
7
8; This code used to crash in SDISel because f16 was promoted to f32 through
9; a `f32 = vector_extract_elt <1 x f16>, i32 0`, which is illegal.
10; The invalid SDNode and thus, the crash was only exposed by the constant
11; folding.
12define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
13; CHECK-LABEL: phi_vec1half_to_f32_with_const_folding:
14; CHECK:       ; %bb.0: ; %entry
15; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; CHECK-NEXT:    s_mov_b32 s4, 0
17; CHECK-NEXT:  ; %bb.1: ; %bb
18; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, s4
19; CHECK-NEXT:    s_mov_b32 s7, 0xf000
20; CHECK-NEXT:    s_mov_b32 s6, 0
21; CHECK-NEXT:    s_mov_b32 s4, s6
22; CHECK-NEXT:    s_mov_b32 s5, s6
23; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
24; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, s4
25; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
26; CHECK-NEXT:    s_waitcnt vmcnt(0)
27; CHECK-NEXT:    s_setpc_b64 s[30:31]
28entry:
29  br label %bb
30
31bb:
32  %phi = phi <1 x half> [ zeroinitializer, %entry ]
33  %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
34  store <2 x half> %res, ptr addrspace(1) %dst
35  ret void
36}
37
38; Same as phi_vec1half_to_f32_with_const_folding but without the folding.
39; This test exercises the same invalid SDNode, but it happened to work by
40; accident before. Here we make sure the fix also work as expected in the
41; non-constant folding case.
42define void @phi_vec1half_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
43; CHECK-LABEL: phi_vec1half_to_f32:
44; CHECK:       ; %bb.0: ; %entry
45; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; CHECK-NEXT:    s_mov_b32 s7, 0xf000
47; CHECK-NEXT:    s_mov_b32 s6, 0
48; CHECK-NEXT:    s_mov_b32 s4, s6
49; CHECK-NEXT:    s_mov_b32 s5, s6
50; CHECK-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
51; CHECK-NEXT:    s_waitcnt vmcnt(0)
52; CHECK-NEXT:    v_cvt_f32_f16_e64 v0, v0
53; CHECK-NEXT:  ; %bb.1: ; %bb
54; CHECK-NEXT:    v_cvt_f16_f32_e64 v0, v0
55; CHECK-NEXT:    s_mov_b32 s7, 0xf000
56; CHECK-NEXT:    s_mov_b32 s6, 0
57; CHECK-NEXT:    s_mov_b32 s4, s6
58; CHECK-NEXT:    s_mov_b32 s5, s6
59; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
60; CHECK-NEXT:    v_cvt_f16_f32_e64 v0, s4
61; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64
62; CHECK-NEXT:    s_waitcnt vmcnt(0)
63; CHECK-NEXT:    s_setpc_b64 s[30:31]
64entry:
65  %input = load <1 x half>, ptr addrspace(1) %src
66  br label %bb
67
68bb:
69  %phi = phi <1 x half> [ %input, %entry ]
70  %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
71  store <2 x half> %res, ptr addrspace(1) %dst
72  ret void
73}
74
75; Same as phi_vec1bf16_to_f32 but with bfloat instead of half.
76define void @phi_vec1bf16_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 {
77; CHECK-LABEL: phi_vec1bf16_to_f32:
78; CHECK:       ; %bb.0: ; %entry
79; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; CHECK-NEXT:    s_mov_b32 s7, 0xf000
81; CHECK-NEXT:    s_mov_b32 s6, 0
82; CHECK-NEXT:    s_mov_b32 s4, s6
83; CHECK-NEXT:    s_mov_b32 s5, s6
84; CHECK-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
85; CHECK-NEXT:    s_mov_b32 s4, 16
86; CHECK-NEXT:    s_waitcnt vmcnt(0)
87; CHECK-NEXT:    v_lshlrev_b32_e64 v0, s4, v0
88; CHECK-NEXT:  ; %bb.1: ; %bb
89; CHECK-NEXT:    v_mul_f32_e64 v0, 1.0, v0
90; CHECK-NEXT:    s_mov_b32 s4, 16
91; CHECK-NEXT:    v_lshrrev_b32_e64 v0, s4, v0
92; CHECK-NEXT:    s_mov_b32 s7, 0xf000
93; CHECK-NEXT:    s_mov_b32 s6, 0
94; CHECK-NEXT:    s_mov_b32 s4, s6
95; CHECK-NEXT:    s_mov_b32 s5, s6
96; CHECK-NEXT:    buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2
97; CHECK-NEXT:    s_waitcnt vmcnt(0)
98; CHECK-NEXT:    s_setpc_b64 s[30:31]
99entry:
100  %input = load <1 x bfloat>, ptr addrspace(1) %src
101  br label %bb
102
103bb:
104  %phi = phi <1 x bfloat> [ %input, %entry ]
105  %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
106  store <2 x bfloat> %res, ptr addrspace(1) %dst
107  ret void
108}
109
110; Same as phi_vec1half_to_f32_with_const_folding but with bfloat instead of half.
111define void @phi_vec1bf16_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
112; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding:
113; CHECK:       ; %bb.0: ; %entry
114; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; CHECK-NEXT:    s_mov_b32 s4, 0
116; CHECK-NEXT:  ; %bb.1: ; %bb
117; CHECK-NEXT:    v_mul_f32_e64 v2, 1.0, s4
118; CHECK-NEXT:    s_mov_b32 s4, 16
119; CHECK-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
120; CHECK-NEXT:    s_mov_b32 s7, 0xf000
121; CHECK-NEXT:    s_mov_b32 s6, 0
122; CHECK-NEXT:    s_mov_b32 s4, s6
123; CHECK-NEXT:    s_mov_b32 s5, s6
124; CHECK-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2
125; CHECK-NEXT:    s_waitcnt vmcnt(0)
126; CHECK-NEXT:    s_setpc_b64 s[30:31]
127entry:
128  br label %bb
129
130bb:
131  %phi = phi <1 x bfloat> [ zeroinitializer, %entry ]
132  %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
133  store <2 x bfloat> %res, ptr addrspace(1) %dst
134  ret void
135}
136
137attributes #0 = { noinline optnone }
138