1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 5 6define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 { 7; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_constained_fadd_f64_fpexcept_strict: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 19 ret double %val 20} 21 22define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 { 23; GCN-LABEL: v_constained_fadd_f64_fpexcept_ignore: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 27; GCN-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX10-LABEL: v_constained_fadd_f64_fpexcept_ignore: 30; GFX10: ; %bb.0: 31; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 33; GFX10-NEXT: s_setpc_b64 s[30:31] 34 %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 35 ret double %val 36} 37 38define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 { 39; GCN-LABEL: v_constained_fadd_f64_fpexcept_maytrap: 40; GCN: ; %bb.0: 41; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 43; GCN-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX10-LABEL: v_constained_fadd_f64_fpexcept_maytrap: 46; GFX10: ; %bb.0: 47; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 49; GFX10-NEXT: s_setpc_b64 s[30:31] 50 %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 51 ret double %val 52} 53 54define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 { 55; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_strict: 56; GCN: ; %bb.0: 57; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 59; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 60; GCN-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_strict: 63; GFX10: ; %bb.0: 64; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 66; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 67; GFX10-NEXT: s_setpc_b64 s[30:31] 68 %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 69 ret <2 x double> %val 70} 71 72define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 { 73; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_ignore: 74; GCN: ; %bb.0: 75; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 77; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 78; GCN-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_ignore: 81; GFX10: ; %bb.0: 82; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 84; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 85; GFX10-NEXT: s_setpc_b64 s[30:31] 86 %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 87 ret <2 x double> %val 88} 89 90define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 { 91; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap: 92; GCN: ; %bb.0: 93; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 95; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 96; GCN-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX10-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap: 99; GFX10: ; %bb.0: 100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] 102; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] 103; GFX10-NEXT: s_setpc_b64 s[30:31] 104 %val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 105 ret <2 x double> %val 106} 107 108define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 { 109; GCN-LABEL: v_constained_fadd_v3f64_fpexcept_strict: 110; GCN: ; %bb.0: 111; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GCN-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7] 113; GCN-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9] 114; GCN-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] 115; GCN-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX10-LABEL: v_constained_fadd_v3f64_fpexcept_strict: 118; GFX10: ; %bb.0: 119; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7] 121; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9] 122; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124 %val = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 125 ret <3 x double> %val 126} 127 128define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 { 129; GCN-LABEL: s_constained_fadd_f64_fpexcept_strict: 130; GCN: ; %bb.0: 131; GCN-NEXT: v_mov_b32_e32 v0, s4 132; GCN-NEXT: v_mov_b32_e32 v1, s5 133; GCN-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] 134; GCN-NEXT: ; return to shader part epilog 135; 136; GFX10-LABEL: s_constained_fadd_f64_fpexcept_strict: 137; GFX10: ; %bb.0: 138; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5] 139; GFX10-NEXT: ; return to shader part epilog 140 %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 141 %cast = bitcast double %val to <2 x float> 142 ret <2 x float> %cast 143} 144 145declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #1 146declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1 147declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1 148 149attributes #0 = { strictfp } 150attributes #1 = { inaccessiblememonly nounwind willreturn } 151