1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX1150 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX1150 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX12 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX12 %s 6 7define amdgpu_vs float @fadd_f32(float inreg %a, float inreg %b) { 8; CHECK-LABEL: fadd_f32: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_add_f32 s0, s0, s1 11; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 12; CHECK-NEXT: v_mov_b32_e32 v0, s0 13; CHECK-NEXT: ; return to shader part epilog 14 %add = fadd float %a, %b 15 ret float %add 16} 17 18define amdgpu_vs float @fsub_f32(float inreg %a, float inreg %b) { 19; CHECK-LABEL: fsub_f32: 20; CHECK: ; %bb.0: 21; CHECK-NEXT: s_sub_f32 s0, s0, s1 22; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 23; CHECK-NEXT: v_mov_b32_e32 v0, s0 24; CHECK-NEXT: ; return to shader part epilog 25 %sub = fsub float %a, %b 26 ret float %sub 27} 28 29define amdgpu_vs float @fmul_f32(float inreg %a, float inreg %b) { 30; CHECK-LABEL: fmul_f32: 31; CHECK: ; %bb.0: 32; CHECK-NEXT: s_mul_f32 s0, s0, s1 33; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 34; CHECK-NEXT: v_mov_b32_e32 v0, s0 35; CHECK-NEXT: ; return to shader part epilog 36 %mul = fmul float %a, %b 37 ret float %mul 38} 39 40define amdgpu_vs float @fmin_f32(float inreg %a, float inreg %b) { 41; GFX1150-LABEL: fmin_f32: 42; GFX1150: ; %bb.0: 43; GFX1150-NEXT: s_min_f32 s0, s0, s1 44; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 45; GFX1150-NEXT: v_mov_b32_e32 v0, s0 46; GFX1150-NEXT: ; return to shader part epilog 47; 48; GFX12-LABEL: fmin_f32: 49; GFX12: ; %bb.0: 50; GFX12-NEXT: s_min_num_f32 s0, s0, s1 51; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 52; GFX12-NEXT: v_mov_b32_e32 v0, s0 53; GFX12-NEXT: ; return to shader part epilog 54 %min = call float @llvm.minnum.f32(float %a, float %b) 55 ret float %min 56} 57 58define amdgpu_vs float @fmax_f32(float inreg %a, float inreg %b) { 59; GFX1150-LABEL: fmax_f32: 60; GFX1150: ; %bb.0: 61; GFX1150-NEXT: s_max_f32 s0, s0, s1 62; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 63; GFX1150-NEXT: v_mov_b32_e32 v0, s0 64; GFX1150-NEXT: ; return to shader part epilog 65; 66; GFX12-LABEL: fmax_f32: 67; GFX12: ; %bb.0: 68; GFX12-NEXT: s_max_num_f32 s0, s0, s1 69; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 70; GFX12-NEXT: v_mov_b32_e32 v0, s0 71; GFX12-NEXT: ; return to shader part epilog 72 %max = call float @llvm.maxnum.f32(float %a, float %b) 73 ret float %max 74} 75 76define amdgpu_vs half @fadd_f16(half inreg %a, half inreg %b) { 77; CHECK-LABEL: fadd_f16: 78; CHECK: ; %bb.0: 79; CHECK-NEXT: s_add_f16 s0, s0, s1 80; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 81; CHECK-NEXT: v_mov_b32_e32 v0, s0 82; CHECK-NEXT: ; return to shader part epilog 83 %add = fadd half %a, %b 84 ret half %add 85} 86 87define amdgpu_vs half @fsub_f16(half inreg %a, half inreg %b) { 88; CHECK-LABEL: fsub_f16: 89; CHECK: ; %bb.0: 90; CHECK-NEXT: s_sub_f16 s0, s0, s1 91; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 92; CHECK-NEXT: v_mov_b32_e32 v0, s0 93; CHECK-NEXT: ; return to shader part epilog 94 %sub = fsub half %a, %b 95 ret half %sub 96} 97 98define amdgpu_vs half @fmul_f16(half inreg %a, half inreg %b) { 99; CHECK-LABEL: fmul_f16: 100; CHECK: ; %bb.0: 101; CHECK-NEXT: s_mul_f16 s0, s0, s1 102; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 103; CHECK-NEXT: v_mov_b32_e32 v0, s0 104; CHECK-NEXT: ; return to shader part epilog 105 %mul = fmul half %a, %b 106 ret half %mul 107} 108 109define amdgpu_vs half @fmin_f16(half inreg %a, half inreg %b) { 110; GFX1150-LABEL: fmin_f16: 111; GFX1150: ; %bb.0: 112; GFX1150-NEXT: s_min_f16 s0, s0, s1 113; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 114; GFX1150-NEXT: v_mov_b32_e32 v0, s0 115; GFX1150-NEXT: ; return to shader part epilog 116; 117; GFX12-LABEL: fmin_f16: 118; GFX12: ; %bb.0: 119; GFX12-NEXT: s_min_num_f16 s0, s0, s1 120; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 121; GFX12-NEXT: v_mov_b32_e32 v0, s0 122; GFX12-NEXT: ; return to shader part epilog 123 %min = call half @llvm.minnum.f16(half %a, half %b) 124 ret half %min 125} 126 127define amdgpu_vs half @fmax_f16(half inreg %a, half inreg %b) { 128; GFX1150-LABEL: fmax_f16: 129; GFX1150: ; %bb.0: 130; GFX1150-NEXT: s_max_f16 s0, s0, s1 131; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 132; GFX1150-NEXT: v_mov_b32_e32 v0, s0 133; GFX1150-NEXT: ; return to shader part epilog 134; 135; GFX12-LABEL: fmax_f16: 136; GFX12: ; %bb.0: 137; GFX12-NEXT: s_max_num_f16 s0, s0, s1 138; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 139; GFX12-NEXT: v_mov_b32_e32 v0, s0 140; GFX12-NEXT: ; return to shader part epilog 141 %max = call half @llvm.maxnum.f16(half %a, half %b) 142 ret half %max 143} 144 145define amdgpu_vs <2 x half> @s_cvt_pkrtz_v2f16_f32(float inreg %x, float inreg %y) { 146; CHECK-LABEL: s_cvt_pkrtz_v2f16_f32: 147; CHECK: ; %bb.0: 148; CHECK-NEXT: s_cvt_pk_rtz_f16_f32 s0, s0, s1 149; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 150; CHECK-NEXT: v_mov_b32_e32 v0, s0 151; CHECK-NEXT: ; return to shader part epilog 152 %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) 153 ret <2 x half> %result 154} 155 156define amdgpu_vs float @fmac_f32(float inreg %a, float inreg %b, float inreg %c) { 157; CHECK-LABEL: fmac_f32: 158; CHECK: ; %bb.0: 159; CHECK-NEXT: s_fmac_f32 s0, s1, s2 160; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 161; CHECK-NEXT: v_mov_b32_e32 v0, s0 162; CHECK-NEXT: ; return to shader part epilog 163 %res = call float @llvm.fma.f32(float %b, float %c, float %a) 164 ret float %res 165} 166 167; Check selection of mov + fmac if src2 of fmac has a use later on 168define amdgpu_vs float @fmac_f32_with_mov(float inreg %a, float inreg %b, float inreg %c) { 169; CHECK-LABEL: fmac_f32_with_mov: 170; CHECK: ; %bb.0: 171; CHECK-NEXT: s_mov_b32 s3, s2 172; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) 173; CHECK-NEXT: s_fmac_f32 s3, s0, s1 174; CHECK-NEXT: s_add_f32 s0, s3, s2 175; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 176; CHECK-NEXT: v_mov_b32_e32 v0, s0 177; CHECK-NEXT: ; return to shader part epilog 178 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 179 %res = fadd float %fma, %c 180 ret float %res 181} 182 183define amdgpu_vs half @fmac_f16(half inreg %a, half inreg %b, half inreg %c) { 184; CHECK-LABEL: fmac_f16: 185; CHECK: ; %bb.0: 186; CHECK-NEXT: s_fmac_f16 s0, s1, s2 187; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 188; CHECK-NEXT: v_mov_b32_e32 v0, s0 189; CHECK-NEXT: ; return to shader part epilog 190 %res = call half @llvm.fma.f16(half %b, half %c, half %a) 191 ret half %res 192} 193 194; Check selection of mov + fmac if src2 of fmac has a use later 195define amdgpu_vs half @fmac_f16_with_mov(half inreg %a, half inreg %b, half inreg %c) { 196; CHECK-LABEL: fmac_f16_with_mov: 197; CHECK: ; %bb.0: 198; CHECK-NEXT: s_mov_b32 s3, s2 199; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) 200; CHECK-NEXT: s_fmac_f16 s3, s0, s1 201; CHECK-NEXT: s_add_f16 s0, s3, s2 202; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 203; CHECK-NEXT: v_mov_b32_e32 v0, s0 204; CHECK-NEXT: ; return to shader part epilog 205 %fma = call half @llvm.fma.f16(half %a, half %b, half %c) 206 %res = fadd half %fma, %c 207 ret half %res 208} 209 210; Regression test for crash in SIFoldOperands 211define amdgpu_ps float @_amdgpu_ps_main() { 212; GFX1150-LABEL: _amdgpu_ps_main: 213; GFX1150: ; %bb.0: ; %bb 214; GFX1150-NEXT: s_mov_b32 s0, 0 215; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 216; GFX1150-NEXT: s_mov_b32 s1, s0 217; GFX1150-NEXT: s_mov_b32 s2, s0 218; GFX1150-NEXT: s_mov_b32 s3, s0 219; GFX1150-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0 220; GFX1150-NEXT: s_waitcnt lgkmcnt(0) 221; GFX1150-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0 222; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 223; GFX1150-NEXT: v_mov_b32_e32 v0, s0 224; GFX1150-NEXT: ; return to shader part epilog 225; 226; GFX12-LABEL: _amdgpu_ps_main: 227; GFX12: ; %bb.0: ; %bb 228; GFX12-NEXT: s_mov_b32 s0, 0 229; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 230; GFX12-NEXT: s_mov_b32 s1, s0 231; GFX12-NEXT: s_mov_b32 s2, s0 232; GFX12-NEXT: s_mov_b32 s3, s0 233; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0 234; GFX12-NEXT: s_wait_kmcnt 0x0 235; GFX12-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0 236; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) 237; GFX12-NEXT: v_mov_b32_e32 v0, s0 238; GFX12-NEXT: ; return to shader part epilog 239bb: 240 %i = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0) 241 %i1 = bitcast i32 %i to float 242 %i2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 4, i32 0) 243 %i3 = bitcast i32 %i2 to float 244 %i4 = fmul contract float %i3, 4.0 245 %i5 = fadd contract float %i4, %i1 246 ret float %i5 247} 248 249declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) 250declare float @llvm.minnum.f32(float, float) 251declare float @llvm.maxnum.f32(float, float) 252declare half @llvm.minnum.f16(half, half) 253declare half @llvm.maxnum.f16(half, half) 254declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) 255declare float @llvm.fma.f32(float, float, float) nounwind readnone 256declare half @llvm.fma.f16(half, half, half) nounwind readnone 257