156156572SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 256156572SMatt Arsenault; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -o - %s | FileCheck -check-prefix=GFX950 %s 3*5a81a559SDavid Green; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -o - %s | FileCheck -check-prefix=GFX950 %s 456156572SMatt Arsenault 556156572SMatt Arsenaultdeclare <2 x half> @llvm.amdgcn.cvt.sr.f16.f32(<2 x half>, float, i32, i1) 656156572SMatt Arsenaultdeclare <2 x bfloat> @llvm.amdgcn.cvt.sr.bf16.f32(<2 x bfloat>, float, i32, i1) 756156572SMatt Arsenault 856156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_bf16_f32_word_sel_0(ptr addrspace(1) %out, float %src0, i32 %src1) { 956156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_bf16_f32_word_sel_0: 1056156572SMatt Arsenault; GFX950: ; %bb.0: 1156156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 1256156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 1356156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_bf16_f32 v4, v2, v3 1456156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 1556156572SMatt Arsenault; GFX950-NEXT: s_endpgm 1656156572SMatt Arsenault %old = load <2 x bfloat>, ptr addrspace(1) %out, align 4 1756156572SMatt Arsenault %cvt = tail call <2 x bfloat> @llvm.amdgcn.cvt.sr.bf16.f32(<2 x bfloat> %old, float %src0, i32 %src1, i1 false) 1856156572SMatt Arsenault store <2 x bfloat> %cvt, ptr addrspace(1) %out, align 8 1956156572SMatt Arsenault ret void 2056156572SMatt Arsenault} 2156156572SMatt Arsenault 2256156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_bf16_f32_word_sel_1(ptr addrspace(1) %out, float %src0, i32 %src1) { 2356156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_bf16_f32_word_sel_1: 2456156572SMatt Arsenault; GFX950: ; %bb.0: 2556156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 2656156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 2756156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_bf16_f32 v4, v2, v3 op_sel:[0,0,1] 2856156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 2956156572SMatt Arsenault; GFX950-NEXT: s_endpgm 3056156572SMatt Arsenault %old = load <2 x bfloat>, ptr addrspace(1) %out, align 4 3156156572SMatt Arsenault %cvt = tail call <2 x bfloat> @llvm.amdgcn.cvt.sr.bf16.f32(<2 x bfloat> %old, float %src0, i32 %src1, i1 true) 3256156572SMatt Arsenault store <2 x bfloat> %cvt, ptr addrspace(1) %out, align 8 3356156572SMatt Arsenault ret void 3456156572SMatt Arsenault} 3556156572SMatt Arsenault 3656156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_bf16_f32_fabs(ptr addrspace(1) %out, float %src0, i32 %src1) { 3756156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_bf16_f32_fabs: 3856156572SMatt Arsenault; GFX950: ; %bb.0: 3956156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 4056156572SMatt Arsenault; GFX950-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2 4156156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 4256156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_bf16_f32 v4, v2, v3 4356156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 4456156572SMatt Arsenault; GFX950-NEXT: s_endpgm 4556156572SMatt Arsenault %old = load <2 x bfloat>, ptr addrspace(1) %out, align 4 4656156572SMatt Arsenault %src0.fabs = call float @llvm.fabs.f32(float %src0) 4756156572SMatt Arsenault %cvt = tail call <2 x bfloat> @llvm.amdgcn.cvt.sr.bf16.f32(<2 x bfloat> %old, float %src0.fabs, i32 %src1, i1 false) 4856156572SMatt Arsenault store <2 x bfloat> %cvt, ptr addrspace(1) %out, align 8 4956156572SMatt Arsenault ret void 5056156572SMatt Arsenault} 5156156572SMatt Arsenault 5256156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_bf16_f32_fneg(ptr addrspace(1) %out, float %src0, i32 %src1) { 5356156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_bf16_f32_fneg: 5456156572SMatt Arsenault; GFX950: ; %bb.0: 5556156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 5656156572SMatt Arsenault; GFX950-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 5756156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 5856156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_bf16_f32 v4, v2, v3 5956156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 6056156572SMatt Arsenault; GFX950-NEXT: s_endpgm 6156156572SMatt Arsenault %old = load <2 x bfloat>, ptr addrspace(1) %out, align 4 6256156572SMatt Arsenault %src0.fneg = fneg float %src0 6356156572SMatt Arsenault %cvt = tail call <2 x bfloat> @llvm.amdgcn.cvt.sr.bf16.f32(<2 x bfloat> %old, float %src0.fneg, i32 %src1, i1 false) 6456156572SMatt Arsenault store <2 x bfloat> %cvt, ptr addrspace(1) %out, align 8 6556156572SMatt Arsenault ret void 6656156572SMatt Arsenault} 6756156572SMatt Arsenault 6856156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_f16_f32_word_sel_0(ptr addrspace(1) %out, float %src0, i32 %src1) { 6956156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_f16_f32_word_sel_0: 7056156572SMatt Arsenault; GFX950: ; %bb.0: 7156156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 7256156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 7356156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_f16_f32 v4, v2, v3 7456156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 7556156572SMatt Arsenault; GFX950-NEXT: s_endpgm 7656156572SMatt Arsenault %old = load <2 x half>, ptr addrspace(1) %out, align 4 7756156572SMatt Arsenault %cvt = tail call <2 x half> @llvm.amdgcn.cvt.sr.f16.f32(<2 x half> %old, float %src0, i32 %src1, i1 false) 7856156572SMatt Arsenault store <2 x half> %cvt, ptr addrspace(1) %out, align 8 7956156572SMatt Arsenault ret void 8056156572SMatt Arsenault} 8156156572SMatt Arsenault 8256156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_f16_f32_word_sel_1(ptr addrspace(1) %out, float %src0, i32 %src1) { 8356156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_f16_f32_word_sel_1: 8456156572SMatt Arsenault; GFX950: ; %bb.0: 8556156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 8656156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 8756156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_f16_f32 v4, v2, v3 op_sel:[0,0,1] 8856156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 8956156572SMatt Arsenault; GFX950-NEXT: s_endpgm 9056156572SMatt Arsenault %old = load <2 x half>, ptr addrspace(1) %out, align 4 9156156572SMatt Arsenault %cvt = tail call <2 x half> @llvm.amdgcn.cvt.sr.f16.f32(<2 x half> %old, float %src0, i32 %src1, i1 true) 9256156572SMatt Arsenault store <2 x half> %cvt, ptr addrspace(1) %out, align 8 9356156572SMatt Arsenault ret void 9456156572SMatt Arsenault} 9556156572SMatt Arsenault 9656156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_f16_f32_fabs(ptr addrspace(1) %out, float %src0, i32 %src1) { 9756156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_f16_f32_fabs: 9856156572SMatt Arsenault; GFX950: ; %bb.0: 9956156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 10056156572SMatt Arsenault; GFX950-NEXT: v_and_b32_e32 v2, 0x7fffffff, v2 10156156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 10256156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_f16_f32 v4, v2, v3 10356156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 10456156572SMatt Arsenault; GFX950-NEXT: s_endpgm 10556156572SMatt Arsenault %old = load <2 x half>, ptr addrspace(1) %out, align 4 10656156572SMatt Arsenault %src0.fabs = call float @llvm.fabs.f32(float %src0) 10756156572SMatt Arsenault %cvt = tail call <2 x half> @llvm.amdgcn.cvt.sr.f16.f32(<2 x half> %old, float %src0.fabs, i32 %src1, i1 false) 10856156572SMatt Arsenault store <2 x half> %cvt, ptr addrspace(1) %out, align 8 10956156572SMatt Arsenault ret void 11056156572SMatt Arsenault} 11156156572SMatt Arsenault 11256156572SMatt Arsenaultdefine amdgpu_ps void @test_cvt_sr_f16_f32_fneg(ptr addrspace(1) %out, float %src0, i32 %src1) { 11356156572SMatt Arsenault; GFX950-LABEL: test_cvt_sr_f16_f32_fneg: 11456156572SMatt Arsenault; GFX950: ; %bb.0: 11556156572SMatt Arsenault; GFX950-NEXT: global_load_dword v4, v[0:1], off 11656156572SMatt Arsenault; GFX950-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 11756156572SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 11856156572SMatt Arsenault; GFX950-NEXT: v_cvt_sr_f16_f32 v4, v2, v3 11956156572SMatt Arsenault; GFX950-NEXT: global_store_dword v[0:1], v4, off 12056156572SMatt Arsenault; GFX950-NEXT: s_endpgm 12156156572SMatt Arsenault %old = load <2 x half>, ptr addrspace(1) %out, align 4 12256156572SMatt Arsenault %src0.fneg = fneg float %src0 12356156572SMatt Arsenault %cvt = tail call <2 x half> @llvm.amdgcn.cvt.sr.f16.f32(<2 x half> %old, float %src0.fneg, i32 %src1, i1 false) 12456156572SMatt Arsenault store <2 x half> %cvt, ptr addrspace(1) %out, align 8 12556156572SMatt Arsenault ret void 12656156572SMatt Arsenault} 127