1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 4 5; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 7 8; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 10 11define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 { 12; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict: 13; GCN: ; %bb.0: 14; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 16; GCN-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict: 19; GFX10PLUS: ; %bb.0: 20; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 22; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 23 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 24 ret float %val 25} 26 27define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 { 28; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore: 29; GCN: ; %bb.0: 30; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 32; GCN-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore: 35; GFX10PLUS: ; %bb.0: 36; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 38; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 39 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 40 ret float %val 41} 42 43define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 { 44; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 45; GCN: ; %bb.0: 46; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 48; GCN-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 51; GFX10PLUS: ; %bb.0: 52; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 54; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 55 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 56 ret float %val 57} 58 59define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { 60; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 61; GCN: ; %bb.0: 62; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 64; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 65; GCN-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 68; GFX10: ; %bb.0: 69; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 71; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 72; GFX10-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 75; GFX11: ; %bb.0: 76; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 78; GFX11-NEXT: s_setpc_b64 s[30:31] 79 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 80 ret <2 x float> %val 81} 82 83define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { 84; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 85; GCN: ; %bb.0: 86; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 88; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 89; GCN-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 92; GFX10: ; %bb.0: 93; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 95; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 96; GFX10-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 99; GFX11: ; %bb.0: 100; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 102; GFX11-NEXT: s_setpc_b64 s[30:31] 103 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 104 ret <2 x float> %val 105} 106 107define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { 108; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 109; GCN: ; %bb.0: 110; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 112; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 113; GCN-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 116; GFX10: ; %bb.0: 117; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 119; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 120; GFX10-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 123; GFX11: ; %bb.0: 124; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 126; GFX11-NEXT: s_setpc_b64 s[30:31] 127 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 128 ret <2 x float> %val 129} 130 131define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 { 132; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 133; GCN: ; %bb.0: 134; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GCN-NEXT: v_sub_f32_e32 v0, v0, v3 136; GCN-NEXT: v_sub_f32_e32 v1, v1, v4 137; GCN-NEXT: v_sub_f32_e32 v2, v2, v5 138; GCN-NEXT: s_setpc_b64 s[30:31] 139; 140; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 141; GFX10: ; %bb.0: 142; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3 144; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4 145; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5 146; GFX10-NEXT: s_setpc_b64 s[30:31] 147; 148; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 149; GFX11: ; %bb.0: 150; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4 152; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5 153; GFX11-NEXT: s_setpc_b64 s[30:31] 154 %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 155 ret <3 x float> %val 156} 157 158define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 { 159; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict: 160; GCN: ; %bb.0: 161; GCN-NEXT: v_mov_b32_e32 v0, s3 162; GCN-NEXT: v_sub_f32_e32 v0, s2, v0 163; GCN-NEXT: ; return to shader part epilog 164; 165; GFX10PLUS-LABEL: s_constained_fsub_f32_fpexcept_strict: 166; GFX10PLUS: ; %bb.0: 167; GFX10PLUS-NEXT: v_sub_f32_e64 v0, s2, s3 168; GFX10PLUS-NEXT: ; return to shader part epilog 169 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 170 ret float %val 171} 172 173define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 { 174; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 175; GCN: ; %bb.0: 176; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1 178; GCN-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 181; GFX10PLUS: ; %bb.0: 182; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1 184; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 185 %fabs.x = call float @llvm.fabs.f32(float %x) #0 186 %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 187 ret float %val 188} 189 190define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 { 191; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 192; GCN: ; %bb.0: 193; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1| 195; GCN-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 198; GFX10PLUS: ; %bb.0: 199; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1| 201; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 202 %fabs.y = call float @llvm.fabs.f32(float %y) #0 203 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict") 204 ret float %val 205} 206 207define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 { 208; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 209; GCN: ; %bb.0: 210; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GCN-NEXT: v_sub_f32_e64 v0, -|v0|, v1 212; GCN-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 215; GFX10PLUS: ; %bb.0: 216; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1 218; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 219 %fabs.x = call float @llvm.fabs.f32(float %x) #0 220 %neg.fabs.x = fneg float %fabs.x 221 %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 222 ret float %val 223} 224 225declare float @llvm.fabs.f32(float) 226declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) 227declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata) 228declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) 229 230attributes #0 = { strictfp } 231