1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=hawaii %s -o - | FileCheck %s 3 4; For all these tests we disable optimizations through function attributes 5; because the code we are exercising here needs phis and we want to keep the 6; IR small. 7 8; This code used to crash in SDISel because f16 was promoted to f32 through 9; a `f32 = vector_extract_elt <1 x f16>, i32 0`, which is illegal. 10; The invalid SDNode and thus, the crash was only exposed by the constant 11; folding. 12define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 { 13; CHECK-LABEL: phi_vec1half_to_f32_with_const_folding: 14; CHECK: ; %bb.0: ; %entry 15; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; CHECK-NEXT: s_mov_b32 s4, 0 17; CHECK-NEXT: ; %bb.1: ; %bb 18; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4 19; CHECK-NEXT: s_mov_b32 s7, 0xf000 20; CHECK-NEXT: s_mov_b32 s6, 0 21; CHECK-NEXT: s_mov_b32 s4, s6 22; CHECK-NEXT: s_mov_b32 s5, s6 23; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2 24; CHECK-NEXT: v_cvt_f16_f32_e64 v2, s4 25; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 26; CHECK-NEXT: s_waitcnt vmcnt(0) 27; CHECK-NEXT: s_setpc_b64 s[30:31] 28entry: 29 br label %bb 30 31bb: 32 %phi = phi <1 x half> [ zeroinitializer, %entry ] 33 %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1> 34 store <2 x half> %res, ptr addrspace(1) %dst 35 ret void 36} 37 38; Same as phi_vec1half_to_f32_with_const_folding but without the folding. 39; This test exercises the same invalid SDNode, but it happened to work by 40; accident before. Here we make sure the fix also work as expected in the 41; non-constant folding case. 42define void @phi_vec1half_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 { 43; CHECK-LABEL: phi_vec1half_to_f32: 44; CHECK: ; %bb.0: ; %entry 45; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; CHECK-NEXT: s_mov_b32 s7, 0xf000 47; CHECK-NEXT: s_mov_b32 s6, 0 48; CHECK-NEXT: s_mov_b32 s4, s6 49; CHECK-NEXT: s_mov_b32 s5, s6 50; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 51; CHECK-NEXT: s_waitcnt vmcnt(0) 52; CHECK-NEXT: v_cvt_f32_f16_e64 v0, v0 53; CHECK-NEXT: ; %bb.1: ; %bb 54; CHECK-NEXT: v_cvt_f16_f32_e64 v0, v0 55; CHECK-NEXT: s_mov_b32 s7, 0xf000 56; CHECK-NEXT: s_mov_b32 s6, 0 57; CHECK-NEXT: s_mov_b32 s4, s6 58; CHECK-NEXT: s_mov_b32 s5, s6 59; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2 60; CHECK-NEXT: v_cvt_f16_f32_e64 v0, s4 61; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 62; CHECK-NEXT: s_waitcnt vmcnt(0) 63; CHECK-NEXT: s_setpc_b64 s[30:31] 64entry: 65 %input = load <1 x half>, ptr addrspace(1) %src 66 br label %bb 67 68bb: 69 %phi = phi <1 x half> [ %input, %entry ] 70 %res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1> 71 store <2 x half> %res, ptr addrspace(1) %dst 72 ret void 73} 74 75; Same as phi_vec1bf16_to_f32 but with bfloat instead of half. 76define void @phi_vec1bf16_to_f32(ptr addrspace(1) %src, ptr addrspace(1) %dst) #0 { 77; CHECK-LABEL: phi_vec1bf16_to_f32: 78; CHECK: ; %bb.0: ; %entry 79; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; CHECK-NEXT: s_mov_b32 s7, 0xf000 81; CHECK-NEXT: s_mov_b32 s6, 0 82; CHECK-NEXT: s_mov_b32 s4, s6 83; CHECK-NEXT: s_mov_b32 s5, s6 84; CHECK-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 85; CHECK-NEXT: s_mov_b32 s4, 16 86; CHECK-NEXT: s_waitcnt vmcnt(0) 87; CHECK-NEXT: v_lshlrev_b32_e64 v0, s4, v0 88; CHECK-NEXT: ; %bb.1: ; %bb 89; CHECK-NEXT: v_mul_f32_e64 v0, 1.0, v0 90; CHECK-NEXT: s_mov_b32 s4, 16 91; CHECK-NEXT: v_lshrrev_b32_e64 v0, s4, v0 92; CHECK-NEXT: s_mov_b32 s7, 0xf000 93; CHECK-NEXT: s_mov_b32 s6, 0 94; CHECK-NEXT: s_mov_b32 s4, s6 95; CHECK-NEXT: s_mov_b32 s5, s6 96; CHECK-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 offset:2 97; CHECK-NEXT: s_waitcnt vmcnt(0) 98; CHECK-NEXT: s_setpc_b64 s[30:31] 99entry: 100 %input = load <1 x bfloat>, ptr addrspace(1) %src 101 br label %bb 102 103bb: 104 %phi = phi <1 x bfloat> [ %input, %entry ] 105 %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1> 106 store <2 x bfloat> %res, ptr addrspace(1) %dst 107 ret void 108} 109 110; Same as phi_vec1half_to_f32_with_const_folding but with bfloat instead of half. 111define void @phi_vec1bf16_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 { 112; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding: 113; CHECK: ; %bb.0: ; %entry 114; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; CHECK-NEXT: s_mov_b32 s4, 0 116; CHECK-NEXT: ; %bb.1: ; %bb 117; CHECK-NEXT: v_mul_f32_e64 v2, 1.0, s4 118; CHECK-NEXT: s_mov_b32 s4, 16 119; CHECK-NEXT: v_lshrrev_b32_e32 v2, s4, v2 120; CHECK-NEXT: s_mov_b32 s7, 0xf000 121; CHECK-NEXT: s_mov_b32 s6, 0 122; CHECK-NEXT: s_mov_b32 s4, s6 123; CHECK-NEXT: s_mov_b32 s5, s6 124; CHECK-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 offset:2 125; CHECK-NEXT: s_waitcnt vmcnt(0) 126; CHECK-NEXT: s_setpc_b64 s[30:31] 127entry: 128 br label %bb 129 130bb: 131 %phi = phi <1 x bfloat> [ zeroinitializer, %entry ] 132 %res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1> 133 store <2 x bfloat> %res, ptr addrspace(1) %dst 134 ret void 135} 136 137attributes #0 = { noinline optnone } 138