1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11 3; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11 4; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX950 5; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX950-ISEL 6 7declare float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> %b, float %c, i1 %clamp) 8 9define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp( 10; GFX11-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp: 11; GFX11: ; %bb.0: ; %entry 12; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 13; GFX11-NEXT: s_waitcnt lgkmcnt(0) 14; GFX11-NEXT: s_load_b32 s6, s[6:7], 0x0 15; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0 16; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x0 17; GFX11-NEXT: s_waitcnt lgkmcnt(0) 18; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s6 19; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 20; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0 clamp 21; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 22; GFX11-NEXT: s_endpgm 23; 24; GFX950-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp: 25; GFX950: ; %bb.0: ; %entry 26; GFX950-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 27; GFX950-NEXT: v_mov_b32_e32 v0, 0 28; GFX950-NEXT: s_waitcnt lgkmcnt(0) 29; GFX950-NEXT: s_load_dword s0, s[12:13], 0x0 30; GFX950-NEXT: s_load_dword s1, s[14:15], 0x0 31; GFX950-NEXT: s_load_dword s2, s[10:11], 0x0 32; GFX950-NEXT: s_waitcnt lgkmcnt(0) 33; GFX950-NEXT: v_mov_b32_e32 v1, s0 34; GFX950-NEXT: v_mov_b32_e32 v2, s1 35; GFX950-NEXT: v_dot2_f32_bf16 v1, s2, v1, v2 clamp 36; GFX950-NEXT: s_nop 2 37; GFX950-NEXT: global_store_dword v0, v1, s[8:9] 38; GFX950-NEXT: s_endpgm 39; 40; GFX950-ISEL-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp: 41; GFX950-ISEL: ; %bb.0: ; %entry 42; GFX950-ISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 43; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0 44; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) 45; GFX950-ISEL-NEXT: s_load_dword s0, s[12:13], 0x0 46; GFX950-ISEL-NEXT: s_load_dword s1, s[14:15], 0x0 47; GFX950-ISEL-NEXT: s_load_dword s2, s[10:11], 0x0 48; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) 49; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, s0 50; GFX950-ISEL-NEXT: v_mov_b32_e32 v2, s1 51; GFX950-ISEL-NEXT: v_dot2_f32_bf16 v1, s2, v1, v2 clamp 52; GFX950-ISEL-NEXT: s_nop 2 53; GFX950-ISEL-NEXT: global_store_dword v0, v1, s[8:9] 54; GFX950-ISEL-NEXT: s_endpgm 55 ptr addrspace(1) %r, 56 ptr addrspace(1) %a, 57 ptr addrspace(1) %b, 58 ptr addrspace(1) %c) { 59entry: 60 %a.val = load <2 x bfloat>, ptr addrspace(1) %a 61 %b.val = load <2 x bfloat>, ptr addrspace(1) %b 62 %c.val = load float, ptr addrspace(1) %c 63 %r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a.val, <2 x bfloat> %b.val, float %c.val, i1 1) 64 store float %r.val, ptr addrspace(1) %r 65 ret void 66} 67 68 69define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_no_clamp( 70; GFX11-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_no_clamp: 71; GFX11: ; %bb.0: ; %entry 72; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 73; GFX11-NEXT: s_waitcnt lgkmcnt(0) 74; GFX11-NEXT: s_load_b32 s6, s[6:7], 0x0 75; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0 76; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x0 77; GFX11-NEXT: s_waitcnt lgkmcnt(0) 78; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s6 79; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 80; GFX11-NEXT: v_dot2_f32_bf16 v0, s2, s3, v0 81; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 82; GFX11-NEXT: s_endpgm 83; 84; GFX950-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_no_clamp: 85; GFX950: ; %bb.0: ; %entry 86; GFX950-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 87; GFX950-NEXT: v_mov_b32_e32 v0, 0 88; GFX950-NEXT: s_waitcnt lgkmcnt(0) 89; GFX950-NEXT: s_load_dword s0, s[12:13], 0x0 90; GFX950-NEXT: s_load_dword s1, s[14:15], 0x0 91; GFX950-NEXT: s_load_dword s2, s[10:11], 0x0 92; GFX950-NEXT: s_waitcnt lgkmcnt(0) 93; GFX950-NEXT: v_mov_b32_e32 v1, s0 94; GFX950-NEXT: v_mov_b32_e32 v2, s1 95; GFX950-NEXT: v_dot2c_f32_bf16_e32 v2, s2, v1 96; GFX950-NEXT: s_nop 2 97; GFX950-NEXT: global_store_dword v0, v2, s[8:9] 98; GFX950-NEXT: s_endpgm 99; 100; GFX950-ISEL-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_no_clamp: 101; GFX950-ISEL: ; %bb.0: ; %entry 102; GFX950-ISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 103; GFX950-ISEL-NEXT: v_mov_b32_e32 v0, 0 104; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) 105; GFX950-ISEL-NEXT: s_load_dword s0, s[12:13], 0x0 106; GFX950-ISEL-NEXT: s_load_dword s1, s[14:15], 0x0 107; GFX950-ISEL-NEXT: s_load_dword s2, s[10:11], 0x0 108; GFX950-ISEL-NEXT: s_waitcnt lgkmcnt(0) 109; GFX950-ISEL-NEXT: v_mov_b32_e32 v1, s0 110; GFX950-ISEL-NEXT: v_mov_b32_e32 v2, s1 111; GFX950-ISEL-NEXT: v_dot2c_f32_bf16_e32 v2, s2, v1 112; GFX950-ISEL-NEXT: s_nop 2 113; GFX950-ISEL-NEXT: global_store_dword v0, v2, s[8:9] 114; GFX950-ISEL-NEXT: s_endpgm 115 ptr addrspace(1) %r, 116 ptr addrspace(1) %a, 117 ptr addrspace(1) %b, 118 ptr addrspace(1) %c) { 119entry: 120 %a.val = load <2 x bfloat>, ptr addrspace(1) %a 121 %b.val = load <2 x bfloat>, ptr addrspace(1) %b 122 %c.val = load float, ptr addrspace(1) %c 123 %r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a.val, <2 x bfloat> %b.val, float %c.val, i1 0) 124 store float %r.val, ptr addrspace(1) %r 125 ret void 126} 127