1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 5 6define i32 @v_sdot8(i32 %a, i32 %b, i32 %c) { 7; GFX906-LABEL: v_sdot8: 8; GFX906: ; %bb.0: 9; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 11; GFX906-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_sdot8: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 false) 19 ret i32 %r 20} 21 22define i32 @v_sdot8_clamp(i32 %a, i32 %b, i32 %c) { 23; GFX906-LABEL: v_sdot8_clamp: 24; GFX906: ; %bb.0: 25; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 clamp 27; GFX906-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX10-LABEL: v_sdot8_clamp: 30; GFX10: ; %bb.0: 31; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 clamp 33; GFX10-NEXT: s_setpc_b64 s[30:31] 34 %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 true) 35 ret i32 %r 36} 37 38; FIXME: Fix argument do not let these casts expand 39; define i32 @v_sdot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) { 40; %a.cast = bitcast <8 x i4> %a to i32 41; %b.cast = bitcast <8 x i4> %b to i32 42; %r = call i32 @llvm.amdgcn.sdot8(i32 %a.cast, i32 %b.cast, i32 %c, i1 false) 43; ret i32 %r 44; } 45 46define i32 @v_sdot8_fnegf32_a(float %a, i32 %b, i32 %c) { 47; GFX906-LABEL: v_sdot8_fnegf32_a: 48; GFX906: ; %bb.0: 49; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX906-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 51; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 52; GFX906-NEXT: s_setpc_b64 s[30:31] 53; 54; GFX10-LABEL: v_sdot8_fnegf32_a: 55; GFX10: ; %bb.0: 56; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 58; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 59; GFX10-NEXT: s_setpc_b64 s[30:31] 60 %neg.a = fneg float %a 61 %cast.neg.a = bitcast float %neg.a to i32 62 %r = call i32 @llvm.amdgcn.sdot8(i32 %cast.neg.a, i32 %b, i32 %c, i1 false) 63 ret i32 %r 64} 65 66define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { 67; GFX906-LABEL: v_sdot8_fnegv2f16_a: 68; GFX906: ; %bb.0: 69; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX906-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 71; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 72; GFX906-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX10-LABEL: v_sdot8_fnegv2f16_a: 75; GFX10: ; %bb.0: 76; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 78; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 79; GFX10-NEXT: s_setpc_b64 s[30:31] 80 %neg.a = fneg <2 x half> %a 81 %cast.neg.a = bitcast <2 x half> %neg.a to i32 82 %r = call i32 @llvm.amdgcn.sdot8(i32 %cast.neg.a, i32 %b, i32 %c, i1 false) 83 ret i32 %r 84} 85 86declare i32 @llvm.amdgcn.sdot8(i32, i32, i32, i1 immarg) #0 87 88attributes #0 = { nounwind readnone speculatable } 89