xref: /llvm-project/llvm/test/CodeGen/AMDGPU/sdwa-commute.ll (revision 173c68239d1d11f4e36c8af07a28310da67568a7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck %s
3
4%ret_struct = type { half, half }
5
6define void @extracted_values(ptr %ret_struct, ptr addrspace(3) %arg0, ptr addrspace(3) %arg1, ptr addrspace(3) %arg2, ptr addrspace(3) %arg3) {
7; CHECK-LABEL: extracted_values:
8; CHECK:       ; %bb.0: ; %entry
9; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; CHECK-NEXT:    ds_read_b32 v3, v3
11; CHECK-NEXT:    ds_read_b32 v4, v4
12; CHECK-NEXT:    ds_read_b32 v2, v2
13; CHECK-NEXT:    ds_read_b32 v5, v5
14; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
15; CHECK-NEXT:    v_sub_f16_sdwa v6, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
16; CHECK-NEXT:    v_sub_f16_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
17; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
18; CHECK-NEXT:    v_sub_f16_sdwa v7, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
19; CHECK-NEXT:    v_sub_f16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
20; CHECK-NEXT:    v_add_f16_e32 v4, v6, v7
21; CHECK-NEXT:    v_add_f16_e32 v2, v3, v2
22; CHECK-NEXT:    v_pack_b32_f16 v2, v4, v2
23; CHECK-NEXT:    flat_store_dword v[0:1], v2
24; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
25; CHECK-NEXT:    s_setpc_b64 s[30:31]
26entry:
27  %tmp0 = load <2 x half>, ptr addrspace(3) %arg1, align 4
28  %tmp1 = extractelement <2 x half> %tmp0, i64 1
29  %tmp2 = load <2 x half>, ptr addrspace(3) %arg2, align 4
30  %tmp3 = extractelement <2 x half> %tmp2, i64 1
31  %tmp4 = fsub contract half %tmp1, %tmp3
32  %tmp5 = load <2 x half>, ptr addrspace(3) %arg0, align 4
33  %tmp6 = extractelement <2 x half> %tmp5, i64 1
34  %tmp7 = load <2 x half>, ptr addrspace(3) %arg3, align 4
35  %tmp8 = extractelement <2 x half> %tmp7, i64 1
36  %tmp9 = fsub contract half %tmp6, %tmp8
37  %tmp10 = fadd contract half %tmp4, %tmp9
38  %tmp11 = fsub contract half %tmp3, %tmp1
39  %tmp12 = fsub contract half %tmp8, %tmp6
40  %tmp13 = fadd contract half %tmp11, %tmp12
41  %field_ptr = getelementptr %ret_struct, ptr %ret_struct, i32 0, i32 0
42  store half %tmp10, ptr %field_ptr, align 2
43  %field_ptr1 = getelementptr %ret_struct, ptr %ret_struct, i32 0, i32 1
44  store half %tmp13, ptr %field_ptr1, align 2
45  ret void
46}
47