1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-TRUE16 %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-FAKE16 %s 6 7; GCN-LABEL: {{^}}v_mul_i16: 8; SI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 9; SI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 10; SI: v_mul_u32_u24 11 12; GFX89: v_mul_lo_u16_e32 v0, v0, v1 13 14; GFX11-TRUE16: v_mul_lo_u16 v0.l, v0.l, v0.h 15; GFX11-FAKE16: v_mul_lo_u16 v0, v0, v1 16define i16 @v_mul_i16(i16 %a, i16 %b) { 17 %r.val = mul i16 %a, %b 18 ret i16 %r.val 19} 20 21; GCN-LABEL: {{^}}s_mul_i16: 22; GCN: s_mul_i16 23define amdgpu_kernel void @s_mul_i16(i16 %a, i16 %b) { 24 %r.val = mul i16 %a, %b 25 store volatile i16 %r.val, ptr addrspace(1) null 26 ret void 27} 28 29; FIXME: Should emit u16 mul here. 30; GCN-LABEL: {{^}}v_mul_i16_uniform_load: 31; GCN: v_mul_lo_u32 32define amdgpu_kernel void @v_mul_i16_uniform_load( 33 ptr addrspace(1) %r, 34 ptr addrspace(1) %a, 35 ptr addrspace(1) %b) { 36entry: 37 %a.val = load i16, ptr addrspace(1) %a 38 %b.val = load i16, ptr addrspace(1) %b 39 %r.val = mul i16 %a.val, %b.val 40 store i16 %r.val, ptr addrspace(1) %r 41 ret void 42} 43 44; GCN-LABEL: {{^}}v_mul_v2i16: 45; SI: v_mul_u32_u24 46; SI: v_mul_u32_u24 47 48; VI: v_mul_lo_u16_sdwa 49; VI: v_mul_lo_u16_e32 50; VI: v_or_b32_e32 51 52 53; GFX9: s_waitcnt 54; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 55; GFX9-NEXT: s_setpc_b64 56define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) { 57 %r.val = mul <2 x i16> %a, %b 58 ret <2 x i16> %r.val 59} 60 61; FIXME: Unpack garbage on gfx9 62; GCN-LABEL: {{^}}v_mul_v3i16: 63; SI: v_mul_u32_u24 64; SI: v_mul_u32_u24 65; SI: v_mul_u32_u24 66 67; VI: v_mul_lo_u16 68; VI: v_mul_lo_u16 69; VI: v_mul_lo_u16 70 71; GFX9: s_waitcnt 72; GFX9-NEXT: v_pk_mul_lo_u16 73; GFX9-NEXT: v_pk_mul_lo_u16 74; GFX9-NEXT: s_setpc_b64 75define <3 x i16> @v_mul_v3i16(<3 x i16> %a, <3 x i16> %b) { 76 %r.val = mul <3 x i16> %a, %b 77 ret <3 x i16> %r.val 78} 79 80; GCN-LABEL: {{^}}v_mul_v4i16: 81; SI: v_mul_u32_u24 82; SI: v_mul_u32_u24 83; SI: v_mul_u32_u24 84; SI: v_mul_u32_u24 85 86; VI: v_mul_lo_u16_sdwa 87; VI: v_mul_lo_u16_sdwa 88; VI: v_mul_lo_u16_e32 89; VI: v_mul_lo_u16_e32 90; VI: v_or_b32_e32 91; VI: v_or_b32_e32 92 93; GFX9: s_waitcnt 94; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2 95; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v3 96; GFX9-NEXT: s_setpc_b64 97define <4 x i16> @v_mul_v4i16(<4 x i16> %a, <4 x i16> %b) { 98 %r.val = mul <4 x i16> %a, %b 99 ret <4 x i16> %r.val 100} 101