1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s 4 5define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { 6; GFX11-LABEL: s_add_u64: 7; GFX11: ; %bb.0: ; %entry 8; GFX11-NEXT: s_clause 0x1 9; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 10; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 11; GFX11-NEXT: v_mov_b32_e32 v2, 0 12; GFX11-NEXT: s_waitcnt lgkmcnt(0) 13; GFX11-NEXT: s_add_u32 s2, s2, s4 14; GFX11-NEXT: s_addc_u32 s3, s3, s5 15; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 16; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 17; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 18; GFX11-NEXT: s_endpgm 19; 20; GFX12-LABEL: s_add_u64: 21; GFX12: ; %bb.0: ; %entry 22; GFX12-NEXT: s_clause 0x1 23; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 24; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 25; GFX12-NEXT: v_mov_b32_e32 v2, 0 26; GFX12-NEXT: s_wait_kmcnt 0x0 27; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5] 28; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 29; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 30; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 31; GFX12-NEXT: s_endpgm 32entry: 33 %add = add i64 %a, %b 34 store i64 %add, ptr addrspace(1) %out 35 ret void 36} 37 38define amdgpu_ps void @v_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { 39; GCN-LABEL: v_add_u64: 40; GCN: ; %bb.0: ; %entry 41; GCN-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 42; GCN-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo 43; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off 44; GCN-NEXT: s_endpgm 45entry: 46 %add = add i64 %a, %b 47 store i64 %add, ptr addrspace(1) %out 48 ret void 49} 50 51define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { 52; GFX11-LABEL: s_sub_u64: 53; GFX11: ; %bb.0: ; %entry 54; GFX11-NEXT: s_clause 0x1 55; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 56; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 57; GFX11-NEXT: v_mov_b32_e32 v2, 0 58; GFX11-NEXT: s_waitcnt lgkmcnt(0) 59; GFX11-NEXT: s_sub_u32 s2, s2, s4 60; GFX11-NEXT: s_subb_u32 s3, s3, s5 61; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 62; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 63; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 64; GFX11-NEXT: s_endpgm 65; 66; GFX12-LABEL: s_sub_u64: 67; GFX12: ; %bb.0: ; %entry 68; GFX12-NEXT: s_clause 0x1 69; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 70; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 71; GFX12-NEXT: v_mov_b32_e32 v2, 0 72; GFX12-NEXT: s_wait_kmcnt 0x0 73; GFX12-NEXT: s_sub_nc_u64 s[2:3], s[2:3], s[4:5] 74; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 75; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 76; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] 77; GFX12-NEXT: s_endpgm 78entry: 79 %sub = sub i64 %a, %b 80 store i64 %sub, ptr addrspace(1) %out 81 ret void 82} 83 84define amdgpu_ps void @v_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { 85; GCN-LABEL: v_sub_u64: 86; GCN: ; %bb.0: ; %entry 87; GCN-NEXT: v_sub_co_u32 v2, vcc_lo, v2, v4 88; GCN-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo 89; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off 90; GCN-NEXT: s_endpgm 91entry: 92 %sub = sub i64 %a, %b 93 store i64 %sub, ptr addrspace(1) %out 94 ret void 95} 96