1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 7 8define double @v_trig_preop_f64(double %a, i32 %b) { 9; GCN-LABEL: v_trig_preop_f64: 10; GCN: ; %bb.0: 11; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2 13; GCN-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX10PLUS-LABEL: v_trig_preop_f64: 16; GFX10PLUS: ; %bb.0: 17; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2 19; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 20 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) 21 ret double %result 22} 23 24define double @v_trig_preop_f64_imm(double %a, i32 %b) { 25; GCN-LABEL: v_trig_preop_f64_imm: 26; GCN: ; %bb.0: 27; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7 29; GCN-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10PLUS-LABEL: v_trig_preop_f64_imm: 32; GFX10PLUS: ; %bb.0: 33; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7 35; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 36 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) 37 ret double %result 38} 39 40define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) { 41; CI-LABEL: s_trig_preop_f64: 42; CI: ; %bb.0: 43; CI-NEXT: s_load_dword s2, s[8:9], 0x2 44; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 45; CI-NEXT: s_waitcnt lgkmcnt(0) 46; CI-NEXT: v_mov_b32_e32 v0, s2 47; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0 48; CI-NEXT: s_add_u32 s0, s0, 4 49; CI-NEXT: s_addc_u32 s1, s1, 0 50; CI-NEXT: v_mov_b32_e32 v3, s1 51; CI-NEXT: v_mov_b32_e32 v2, s0 52; CI-NEXT: flat_store_dword v[0:1], v0 53; CI-NEXT: s_waitcnt vmcnt(0) 54; CI-NEXT: flat_store_dword v[2:3], v1 55; CI-NEXT: s_waitcnt vmcnt(0) 56; CI-NEXT: s_endpgm 57; 58; VI-LABEL: s_trig_preop_f64: 59; VI: ; %bb.0: 60; VI-NEXT: s_load_dword s2, s[8:9], 0x8 61; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 62; VI-NEXT: s_waitcnt lgkmcnt(0) 63; VI-NEXT: v_mov_b32_e32 v0, s2 64; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0 65; VI-NEXT: s_add_u32 s0, s0, 4 66; VI-NEXT: s_addc_u32 s1, s1, 0 67; VI-NEXT: v_mov_b32_e32 v3, s1 68; VI-NEXT: v_mov_b32_e32 v2, s0 69; VI-NEXT: flat_store_dword v[0:1], v0 70; VI-NEXT: s_waitcnt vmcnt(0) 71; VI-NEXT: flat_store_dword v[2:3], v1 72; VI-NEXT: s_waitcnt vmcnt(0) 73; VI-NEXT: s_endpgm 74; 75; GFX9-LABEL: s_trig_preop_f64: 76; GFX9: ; %bb.0: 77; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8 78; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 79; GFX9-NEXT: s_waitcnt lgkmcnt(0) 80; GFX9-NEXT: v_mov_b32_e32 v0, s2 81; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0 82; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 83; GFX9-NEXT: s_waitcnt vmcnt(0) 84; GFX9-NEXT: s_endpgm 85; 86; GFX10-LABEL: s_trig_preop_f64: 87; GFX10: ; %bb.0: 88; GFX10-NEXT: s_clause 0x1 89; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 90; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 91; GFX10-NEXT: s_waitcnt lgkmcnt(0) 92; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2 93; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 94; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 95; GFX10-NEXT: s_endpgm 96; 97; GFX11-LABEL: s_trig_preop_f64: 98; GFX11: ; %bb.0: 99; GFX11-NEXT: s_clause 0x1 100; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 101; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 102; GFX11-NEXT: s_waitcnt lgkmcnt(0) 103; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2 104; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc 105; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 106; GFX11-NEXT: s_endpgm 107 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) 108 store volatile double %result, ptr undef 109 ret void 110} 111 112define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) { 113; CI-LABEL: s_trig_preop_f64_imm: 114; CI: ; %bb.0: 115; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 116; CI-NEXT: s_waitcnt lgkmcnt(0) 117; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 118; CI-NEXT: s_add_u32 s0, s0, 4 119; CI-NEXT: s_addc_u32 s1, s1, 0 120; CI-NEXT: v_mov_b32_e32 v3, s1 121; CI-NEXT: v_mov_b32_e32 v2, s0 122; CI-NEXT: flat_store_dword v[0:1], v0 123; CI-NEXT: s_waitcnt vmcnt(0) 124; CI-NEXT: flat_store_dword v[2:3], v1 125; CI-NEXT: s_waitcnt vmcnt(0) 126; CI-NEXT: s_endpgm 127; 128; VI-LABEL: s_trig_preop_f64_imm: 129; VI: ; %bb.0: 130; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 131; VI-NEXT: s_waitcnt lgkmcnt(0) 132; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 133; VI-NEXT: s_add_u32 s0, s0, 4 134; VI-NEXT: s_addc_u32 s1, s1, 0 135; VI-NEXT: v_mov_b32_e32 v3, s1 136; VI-NEXT: v_mov_b32_e32 v2, s0 137; VI-NEXT: flat_store_dword v[0:1], v0 138; VI-NEXT: s_waitcnt vmcnt(0) 139; VI-NEXT: flat_store_dword v[2:3], v1 140; VI-NEXT: s_waitcnt vmcnt(0) 141; VI-NEXT: s_endpgm 142; 143; GFX9-LABEL: s_trig_preop_f64_imm: 144; GFX9: ; %bb.0: 145; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 146; GFX9-NEXT: s_waitcnt lgkmcnt(0) 147; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 148; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 149; GFX9-NEXT: s_waitcnt vmcnt(0) 150; GFX9-NEXT: s_endpgm 151; 152; GFX10-LABEL: s_trig_preop_f64_imm: 153; GFX10: ; %bb.0: 154; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 155; GFX10-NEXT: s_waitcnt lgkmcnt(0) 156; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 157; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 158; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 159; GFX10-NEXT: s_endpgm 160; 161; GFX11-LABEL: s_trig_preop_f64_imm: 162; GFX11: ; %bb.0: 163; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 164; GFX11-NEXT: s_waitcnt lgkmcnt(0) 165; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 166; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc 167; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 168; GFX11-NEXT: s_endpgm 169 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) 170 store volatile double %result, ptr undef 171 ret void 172} 173 174declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0 175 176attributes #0 = { nounwind readnone speculatable } 177