1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 8 9define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 { 10; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict: 11; GCN: ; %bb.0: 12; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 14; GCN-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict: 17; GFX10PLUS: ; %bb.0: 18; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 20; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 21 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 22 ret float %val 23} 24 25define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) #0 { 26; GCN-LABEL: v_constained_fmul_f32_fpexcept_ignore: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_ignore: 33; GFX10PLUS: ; %bb.0: 34; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 36; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 37 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 38 ret float %val 39} 40 41define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) #0 { 42; GCN-LABEL: v_constained_fmul_f32_fpexcept_maytrap: 43; GCN: ; %bb.0: 44; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 46; GCN-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_maytrap: 49; GFX10PLUS: ; %bb.0: 50; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX10PLUS-NEXT: v_mul_f32_e32 v0, v0, v1 52; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 53 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 54 ret float %val 55} 56 57define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { 58; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_strict: 59; GCN: ; %bb.0: 60; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GCN-NEXT: v_mul_f32_e32 v0, v0, v2 62; GCN-NEXT: v_mul_f32_e32 v1, v1, v3 63; GCN-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_strict: 66; GFX10: ; %bb.0: 67; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 69; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 70; GFX10-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_strict: 73; GFX11: ; %bb.0: 74; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 76; GFX11-NEXT: s_setpc_b64 s[30:31] 77 %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 78 ret <2 x float> %val 79} 80 81define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { 82; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_ignore: 83; GCN: ; %bb.0: 84; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GCN-NEXT: v_mul_f32_e32 v0, v0, v2 86; GCN-NEXT: v_mul_f32_e32 v1, v1, v3 87; GCN-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_ignore: 90; GFX10: ; %bb.0: 91; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 93; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 94; GFX10-NEXT: s_setpc_b64 s[30:31] 95; 96; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_ignore: 97; GFX11: ; %bb.0: 98; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 100; GFX11-NEXT: s_setpc_b64 s[30:31] 101 %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 102 ret <2 x float> %val 103} 104 105define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { 106; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap: 107; GCN: ; %bb.0: 108; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GCN-NEXT: v_mul_f32_e32 v0, v0, v2 110; GCN-NEXT: v_mul_f32_e32 v1, v1, v3 111; GCN-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap: 114; GFX10: ; %bb.0: 115; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 117; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 118; GFX10-NEXT: s_setpc_b64 s[30:31] 119; 120; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap: 121; GFX11: ; %bb.0: 122; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 124; GFX11-NEXT: s_setpc_b64 s[30:31] 125 %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 126 ret <2 x float> %val 127} 128 129define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 { 130; GCN-LABEL: v_constained_fmul_v3f32_fpexcept_strict: 131; GCN: ; %bb.0: 132; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GCN-NEXT: v_mul_f32_e32 v0, v0, v3 134; GCN-NEXT: v_mul_f32_e32 v1, v1, v4 135; GCN-NEXT: v_mul_f32_e32 v2, v2, v5 136; GCN-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX10-LABEL: v_constained_fmul_v3f32_fpexcept_strict: 139; GFX10: ; %bb.0: 140; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3 142; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4 143; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5 144; GFX10-NEXT: s_setpc_b64 s[30:31] 145; 146; GFX11-LABEL: v_constained_fmul_v3f32_fpexcept_strict: 147; GFX11: ; %bb.0: 148; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX11-NEXT: v_dual_mul_f32 v0, v0, v3 :: v_dual_mul_f32 v1, v1, v4 150; GFX11-NEXT: v_mul_f32_e32 v2, v2, v5 151; GFX11-NEXT: s_setpc_b64 s[30:31] 152 %val = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 153 ret <3 x float> %val 154} 155 156define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 { 157; GCN-LABEL: s_constained_fmul_f32_fpexcept_strict: 158; GCN: ; %bb.0: 159; GCN-NEXT: v_mov_b32_e32 v0, s3 160; GCN-NEXT: v_mul_f32_e32 v0, s2, v0 161; GCN-NEXT: ; return to shader part epilog 162; 163; GFX10PLUS-LABEL: s_constained_fmul_f32_fpexcept_strict: 164; GFX10PLUS: ; %bb.0: 165; GFX10PLUS-NEXT: v_mul_f32_e64 v0, s2, s3 166; GFX10PLUS-NEXT: ; return to shader part epilog 167 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 168 ret float %val 169} 170 171define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 { 172; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs: 173; GCN: ; %bb.0: 174; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GCN-NEXT: v_mul_f32_e64 v0, |v0|, v1 176; GCN-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs: 179; GFX10PLUS: ; %bb.0: 180; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX10PLUS-NEXT: v_mul_f32_e64 v0, |v0|, v1 182; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 183 %fabs.x = call float @llvm.fabs.f32(float %x) #0 184 %val = call float @llvm.experimental.constrained.fmul.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 185 ret float %val 186} 187 188define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 { 189; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs: 190; GCN: ; %bb.0: 191; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GCN-NEXT: v_mul_f32_e64 v0, v0, |v1| 193; GCN-NEXT: s_setpc_b64 s[30:31] 194; 195; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs: 196; GFX10PLUS: ; %bb.0: 197; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX10PLUS-NEXT: v_mul_f32_e64 v0, v0, |v1| 199; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 200 %fabs.y = call float @llvm.fabs.f32(float %y) #0 201 %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict") 202 ret float %val 203} 204 205define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 { 206; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs: 207; GCN: ; %bb.0: 208; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GCN-NEXT: v_mul_f32_e64 v0, -|v0|, v1 210; GCN-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs: 213; GFX10PLUS: ; %bb.0: 214; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX10PLUS-NEXT: v_mul_f32_e64 v0, -|v0|, v1 216; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 217 %fabs.x = call float @llvm.fabs.f32(float %x) #0 218 %neg.fabs.x = fneg float %fabs.x 219 %val = call float @llvm.experimental.constrained.fmul.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 220 ret float %val 221} 222 223declare float @llvm.fabs.f32(float) 224declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) 225declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x float>, metadata, metadata) 226declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata) 227 228attributes #0 = { strictfp } 229