1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { 7; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_add_f32_e32 v0, v0, v1 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict: 14; GFX10PLUS: ; %bb.0: 15; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 17; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 18 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 19 ret float %val 20} 21 22define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { 23; GCN-LABEL: v_constained_fadd_f32_fpexcept_ignore: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_add_f32_e32 v0, v0, v1 27; GCN-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_ignore: 30; GFX10PLUS: ; %bb.0: 31; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 33; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 34 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 35 ret float %val 36} 37 38define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { 39; GCN-LABEL: v_constained_fadd_f32_fpexcept_maytrap: 40; GCN: ; %bb.0: 41; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GCN-NEXT: v_add_f32_e32 v0, v0, v1 43; GCN-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_maytrap: 46; GFX10PLUS: ; %bb.0: 47; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX10PLUS-NEXT: v_add_f32_e32 v0, v0, v1 49; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 50 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 51 ret float %val 52} 53 54define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { 55; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_strict: 56; GCN: ; %bb.0: 57; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GCN-NEXT: v_add_f32_e32 v0, v0, v2 59; GCN-NEXT: v_add_f32_e32 v1, v1, v3 60; GCN-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_strict: 63; GFX10: ; %bb.0: 64; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 66; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 67; GFX10-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_strict: 70; GFX11: ; %bb.0: 71; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 73; GFX11-NEXT: s_setpc_b64 s[30:31] 74 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 75 ret <2 x float> %val 76} 77 78define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { 79; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_ignore: 80; GCN: ; %bb.0: 81; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GCN-NEXT: v_add_f32_e32 v0, v0, v2 83; GCN-NEXT: v_add_f32_e32 v1, v1, v3 84; GCN-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_ignore: 87; GFX10: ; %bb.0: 88; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 90; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 91; GFX10-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_ignore: 94; GFX11: ; %bb.0: 95; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 97; GFX11-NEXT: s_setpc_b64 s[30:31] 98 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 99 ret <2 x float> %val 100} 101 102define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { 103; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap: 104; GCN: ; %bb.0: 105; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GCN-NEXT: v_add_f32_e32 v0, v0, v2 107; GCN-NEXT: v_add_f32_e32 v1, v1, v3 108; GCN-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX10-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap: 111; GFX10: ; %bb.0: 112; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 114; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 115; GFX10-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX11-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap: 118; GFX11: ; %bb.0: 119; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 121; GFX11-NEXT: s_setpc_b64 s[30:31] 122 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 123 ret <2 x float> %val 124} 125 126define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 { 127; GCN-LABEL: v_constained_fadd_v3f32_fpexcept_strict: 128; GCN: ; %bb.0: 129; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GCN-NEXT: v_add_f32_e32 v0, v0, v3 131; GCN-NEXT: v_add_f32_e32 v1, v1, v4 132; GCN-NEXT: v_add_f32_e32 v2, v2, v5 133; GCN-NEXT: s_setpc_b64 s[30:31] 134; 135; GFX10-LABEL: v_constained_fadd_v3f32_fpexcept_strict: 136; GFX10: ; %bb.0: 137; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX10-NEXT: v_add_f32_e32 v0, v0, v3 139; GFX10-NEXT: v_add_f32_e32 v1, v1, v4 140; GFX10-NEXT: v_add_f32_e32 v2, v2, v5 141; GFX10-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX11-LABEL: v_constained_fadd_v3f32_fpexcept_strict: 144; GFX11: ; %bb.0: 145; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX11-NEXT: v_dual_add_f32 v0, v0, v3 :: v_dual_add_f32 v1, v1, v4 147; GFX11-NEXT: v_add_f32_e32 v2, v2, v5 148; GFX11-NEXT: s_setpc_b64 s[30:31] 149 %val = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 150 ret <3 x float> %val 151} 152 153define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 { 154; GCN-LABEL: s_constained_fadd_f32_fpexcept_strict: 155; GCN: ; %bb.0: 156; GCN-NEXT: v_mov_b32_e32 v0, s3 157; GCN-NEXT: v_add_f32_e32 v0, s2, v0 158; GCN-NEXT: ; return to shader part epilog 159; 160; GFX10PLUS-LABEL: s_constained_fadd_f32_fpexcept_strict: 161; GFX10PLUS: ; %bb.0: 162; GFX10PLUS-NEXT: v_add_f32_e64 v0, s2, s3 163; GFX10PLUS-NEXT: ; return to shader part epilog 164 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 165 ret float %val 166} 167 168define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 { 169; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs: 170; GCN: ; %bb.0: 171; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GCN-NEXT: v_add_f32_e64 v0, |v0|, v1 173; GCN-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs: 176; GFX10PLUS: ; %bb.0: 177; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX10PLUS-NEXT: v_add_f32_e64 v0, |v0|, v1 179; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 180 %fabs.x = call float @llvm.fabs.f32(float %x) #0 181 %val = call float @llvm.experimental.constrained.fadd.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 182 ret float %val 183} 184 185define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 { 186; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs: 187; GCN: ; %bb.0: 188; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GCN-NEXT: v_add_f32_e64 v0, v0, |v1| 190; GCN-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs: 193; GFX10PLUS: ; %bb.0: 194; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10PLUS-NEXT: v_add_f32_e64 v0, v0, |v1| 196; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 197 %fabs.y = call float @llvm.fabs.f32(float %y) #0 198 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict") 199 ret float %val 200} 201 202define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 { 203; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs: 204; GCN: ; %bb.0: 205; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GCN-NEXT: v_sub_f32_e64 v0, v1, |v0| 207; GCN-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs: 210; GFX10PLUS: ; %bb.0: 211; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v1, |v0| 213; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 214 %fabs.x = call float @llvm.fabs.f32(float %x) #0 215 %neg.fabs.x = fneg float %fabs.x 216 %val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 217 ret float %val 218} 219 220declare float @llvm.fabs.f32(float) 221declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 222declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) 223declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) 224 225attributes #0 = { strictfp } 226