1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s 5 6; Make sure we don't violate the constant bus restriction 7 8define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) { 9; GFX9-LABEL: fmul_s_s: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: v_mov_b32_e32 v0, s3 12; GFX9-NEXT: v_mul_f32_e32 v0, s2, v0 13; GFX9-NEXT: ; return to shader part epilog 14; 15; GFX10PLUS-LABEL: fmul_s_s: 16; GFX10PLUS: ; %bb.0: 17; GFX10PLUS-NEXT: v_mul_f32_e64 v0, s2, s3 18; GFX10PLUS-NEXT: ; return to shader part epilog 19 %result = fmul float %src0, %src1 20 ret float %result 21} 22 23define amdgpu_ps float @fmul_ss(float inreg %src) { 24; GFX9-LABEL: fmul_ss: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: v_mul_f32_e64 v0, s2, s2 27; GFX9-NEXT: ; return to shader part epilog 28; 29; GFX10PLUS-LABEL: fmul_ss: 30; GFX10PLUS: ; %bb.0: 31; GFX10PLUS-NEXT: v_mul_f32_e64 v0, s2, s2 32; GFX10PLUS-NEXT: ; return to shader part epilog 33 %result = fmul float %src, %src 34 ret float %result 35} 36 37; Ternary operation with 3 different SGPRs 38define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) { 39; GFX9-LABEL: fma_s_s_s: 40; GFX9: ; %bb.0: 41; GFX9-NEXT: v_mov_b32_e32 v0, s3 42; GFX9-NEXT: v_mov_b32_e32 v1, s4 43; GFX9-NEXT: v_fma_f32 v0, s2, v0, v1 44; GFX9-NEXT: ; return to shader part epilog 45; 46; GFX10PLUS-LABEL: fma_s_s_s: 47; GFX10PLUS: ; %bb.0: 48; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s4 49; GFX10PLUS-NEXT: v_fma_f32 v0, s3, s2, v0 50; GFX10PLUS-NEXT: ; return to shader part epilog 51 %result = call float @llvm.fma.f32(float %src0, float %src1, float %src2) 52 ret float %result 53} 54 55; Ternary operation with 3 identical SGPRs 56define amdgpu_ps float @fma_sss(float inreg %src) { 57; GFX9-LABEL: fma_sss: 58; GFX9: ; %bb.0: 59; GFX9-NEXT: v_fma_f32 v0, s2, s2, s2 60; GFX9-NEXT: ; return to shader part epilog 61; 62; GFX10PLUS-LABEL: fma_sss: 63; GFX10PLUS: ; %bb.0: 64; GFX10PLUS-NEXT: v_fma_f32 v0, s2, s2, s2 65; GFX10PLUS-NEXT: ; return to shader part epilog 66 %result = call float @llvm.fma.f32(float %src, float %src, float %src) 67 ret float %result 68} 69 70; src0/1 are same SGPR 71define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) { 72; GFX9-LABEL: fma_ss_s: 73; GFX9: ; %bb.0: 74; GFX9-NEXT: v_mov_b32_e32 v0, s3 75; GFX9-NEXT: v_fma_f32 v0, s2, s2, v0 76; GFX9-NEXT: ; return to shader part epilog 77; 78; GFX10PLUS-LABEL: fma_ss_s: 79; GFX10PLUS: ; %bb.0: 80; GFX10PLUS-NEXT: v_fma_f32 v0, s2, s2, s3 81; GFX10PLUS-NEXT: ; return to shader part epilog 82 %result = call float @llvm.fma.f32(float %src01, float %src01, float %src2) 83 ret float %result 84} 85 86; src1/2 are same SGPR 87define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) { 88; GFX9-LABEL: fma_s_ss: 89; GFX9: ; %bb.0: 90; GFX9-NEXT: v_mov_b32_e32 v0, s3 91; GFX9-NEXT: v_fma_f32 v0, s2, v0, v0 92; GFX9-NEXT: ; return to shader part epilog 93; 94; GFX10PLUS-LABEL: fma_s_ss: 95; GFX10PLUS: ; %bb.0: 96; GFX10PLUS-NEXT: v_fma_f32 v0, s2, s3, s3 97; GFX10PLUS-NEXT: ; return to shader part epilog 98 %result = call float @llvm.fma.f32(float %src0, float %src12, float %src12) 99 ret float %result 100} 101 102; src0/2 are same SGPR 103define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) { 104; GFX9-LABEL: fma_ss_s_same_outer: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: v_mov_b32_e32 v0, s3 107; GFX9-NEXT: v_fma_f32 v0, s2, v0, s2 108; GFX9-NEXT: ; return to shader part epilog 109; 110; GFX10PLUS-LABEL: fma_ss_s_same_outer: 111; GFX10PLUS: ; %bb.0: 112; GFX10PLUS-NEXT: v_fma_f32 v0, s2, s3, s2 113; GFX10PLUS-NEXT: ; return to shader part epilog 114 %result = call float @llvm.fma.f32(float %src02, float %src1, float %src02) 115 ret float %result 116} 117 118define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) { 119; GFX9-LABEL: fcmp_s_s: 120; GFX9: ; %bb.0: 121; GFX9-NEXT: v_mov_b32_e32 v0, s3 122; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s2, v0 123; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc 124; GFX9-NEXT: ; return to shader part epilog 125; 126; GFX10PLUS-LABEL: fcmp_s_s: 127; GFX10PLUS: ; %bb.0: 128; GFX10PLUS-NEXT: v_cmp_eq_f32_e64 s0, s2, s3 129; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 130; GFX10PLUS-NEXT: ; return to shader part epilog 131 %cmp = fcmp oeq float %src0, %src1 132 %result = select i1 %cmp, float 1.0, float 0.0 133 ret float %result 134} 135 136define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { 137; GFX9-LABEL: select_vcc_s_s: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: v_mov_b32_e32 v2, s2 140; GFX9-NEXT: v_mov_b32_e32 v3, s3 141; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 142; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 143; GFX9-NEXT: ; return to shader part epilog 144; 145; GFX10PLUS-LABEL: select_vcc_s_s: 146; GFX10PLUS: ; %bb.0: 147; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s3 148; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 149; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo 150; GFX10PLUS-NEXT: ; return to shader part epilog 151 %cmp = fcmp oeq float %cmp0, %cmp1 152 %result = select i1 %cmp, float %src0, float %src1 153 ret float %result 154} 155 156define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { 157; GFX9-LABEL: select_vcc_fneg_s_s: 158; GFX9: ; %bb.0: 159; GFX9-NEXT: v_mov_b32_e32 v2, s3 160; GFX9-NEXT: v_mov_b32_e32 v3, s2 161; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 162; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -v3, vcc 163; GFX9-NEXT: ; return to shader part epilog 164; 165; GFX10PLUS-LABEL: select_vcc_fneg_s_s: 166; GFX10PLUS: ; %bb.0: 167; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s2 168; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 169; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, s3, -v2, vcc_lo 170; GFX10PLUS-NEXT: ; return to shader part epilog 171 %cmp = fcmp oeq float %cmp0, %cmp1 172 %neg.src0 = fneg float %src0 173 %result = select i1 %cmp, float %neg.src0, float %src1 174 ret float %result 175} 176 177; Constant bus used by vcc 178define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) { 179; GFX9-LABEL: amdgcn_div_fmas_sss: 180; GFX9: ; %bb.0: 181; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 182; GFX9-NEXT: v_mov_b32_e32 v0, s2 183; GFX9-NEXT: s_nop 2 184; GFX9-NEXT: v_div_fmas_f32 v0, v0, v0, v0 185; GFX9-NEXT: ; return to shader part epilog 186; 187; GFX10PLUS-LABEL: amdgcn_div_fmas_sss: 188; GFX10PLUS: ; %bb.0: 189; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 190; GFX10PLUS-NEXT: v_div_fmas_f32 v0, s2, s2, s2 191; GFX10PLUS-NEXT: ; return to shader part epilog 192 %vcc = fcmp oeq float %cmp.src, 0.0 193 %result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc) 194 ret float %result 195} 196 197define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) { 198; GFX9-LABEL: class_s_s: 199; GFX9: ; %bb.0: 200; GFX9-NEXT: v_mov_b32_e32 v0, s3 201; GFX9-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 202; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc 203; GFX9-NEXT: ; return to shader part epilog 204; 205; GFX10PLUS-LABEL: class_s_s: 206; GFX10PLUS: ; %bb.0: 207; GFX10PLUS-NEXT: v_cmp_class_f32_e64 s0, s2, s3 208; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 209; GFX10PLUS-NEXT: ; return to shader part epilog 210 %class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1) 211 %result = select i1 %class, float 1.0, float 0.0 212 ret float %result 213} 214 215define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) { 216; GFX9-LABEL: div_scale_s_s_true: 217; GFX9: ; %bb.0: 218; GFX9-NEXT: v_mov_b32_e32 v0, s3 219; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2 220; GFX9-NEXT: ; return to shader part epilog 221; 222; GFX10-LABEL: div_scale_s_s_true: 223; GFX10: ; %bb.0: 224; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2 225; GFX10-NEXT: ; return to shader part epilog 226; 227; GFX11-LABEL: div_scale_s_s_true: 228; GFX11: ; %bb.0: 229; GFX11-NEXT: v_div_scale_f32 v0, null, s2, s3, s2 230; GFX11-NEXT: ; return to shader part epilog 231 %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true) 232 %result = extractvalue { float, i1 } %div.scale, 0 233 ret float %result 234} 235 236define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) { 237; GFX9-LABEL: div_scale_s_s_false: 238; GFX9: ; %bb.0: 239; GFX9-NEXT: v_mov_b32_e32 v0, s3 240; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2 241; GFX9-NEXT: ; return to shader part epilog 242; 243; GFX10-LABEL: div_scale_s_s_false: 244; GFX10: ; %bb.0: 245; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2 246; GFX10-NEXT: ; return to shader part epilog 247; 248; GFX11-LABEL: div_scale_s_s_false: 249; GFX11: ; %bb.0: 250; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 251; GFX11-NEXT: ; return to shader part epilog 252 %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false) 253 %result = extractvalue { float, i1 } %div.scale, 0 254 ret float %result 255} 256 257declare float @llvm.fma.f32(float, float, float) #0 258declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1 259declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1 260declare i1 @llvm.amdgcn.class.f32(float, i32) #1 261 262attributes #0 = { nounwind readnone speculatable willreturn } 263attributes #1 = { nounwind readnone speculatable } 264