1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 8 9define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 { 10; GCN-LABEL: v_constained_fsub_f64_fpexcept_strict: 11; GCN: ; %bb.0: 12; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 14; GCN-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX10-LABEL: v_constained_fsub_f64_fpexcept_strict: 17; GFX10: ; %bb.0: 18; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 20; GFX10-NEXT: s_setpc_b64 s[30:31] 21 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 22 ret double %val 23} 24 25define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) #0 { 26; GCN-LABEL: v_constained_fsub_f64_fpexcept_ignore: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10-LABEL: v_constained_fsub_f64_fpexcept_ignore: 33; GFX10: ; %bb.0: 34; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 36; GFX10-NEXT: s_setpc_b64 s[30:31] 37 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 38 ret double %val 39} 40 41define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) #0 { 42; GCN-LABEL: v_constained_fsub_f64_fpexcept_maytrap: 43; GCN: ; %bb.0: 44; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 46; GCN-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX10-LABEL: v_constained_fsub_f64_fpexcept_maytrap: 49; GFX10: ; %bb.0: 50; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 52; GFX10-NEXT: s_setpc_b64 s[30:31] 53 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 54 ret double %val 55} 56 57define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 { 58; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_strict: 59; GCN: ; %bb.0: 60; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 62; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 63; GCN-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_strict: 66; GFX10: ; %bb.0: 67; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 69; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 70; GFX10-NEXT: s_setpc_b64 s[30:31] 71 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 72 ret <2 x double> %val 73} 74 75define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 { 76; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_ignore: 77; GCN: ; %bb.0: 78; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 80; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 81; GCN-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_ignore: 84; GFX10: ; %bb.0: 85; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 87; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 88; GFX10-NEXT: s_setpc_b64 s[30:31] 89 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 90 ret <2 x double> %val 91} 92 93define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 { 94; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap: 95; GCN: ; %bb.0: 96; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 98; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 99; GCN-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap: 102; GFX10: ; %bb.0: 103; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 105; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 106; GFX10-NEXT: s_setpc_b64 s[30:31] 107 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 108 ret <2 x double> %val 109} 110 111define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 { 112; GCN-LABEL: v_constained_fsub_v3f64_fpexcept_strict: 113; GCN: ; %bb.0: 114; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7] 116; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9] 117; GCN-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11] 118; GCN-NEXT: s_setpc_b64 s[30:31] 119; 120; GFX10-LABEL: v_constained_fsub_v3f64_fpexcept_strict: 121; GFX10: ; %bb.0: 122; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7] 124; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9] 125; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11] 126; GFX10-NEXT: s_setpc_b64 s[30:31] 127 %val = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 128 ret <3 x double> %val 129} 130 131define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 { 132; GCN-LABEL: s_constained_fsub_f64_fpexcept_strict: 133; GCN: ; %bb.0: 134; GCN-NEXT: v_mov_b32_e32 v0, s4 135; GCN-NEXT: v_mov_b32_e32 v1, s5 136; GCN-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] 137; GCN-NEXT: ; return to shader part epilog 138; 139; GFX10-LABEL: s_constained_fsub_f64_fpexcept_strict: 140; GFX10: ; %bb.0: 141; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], -s[4:5] 142; GFX10-NEXT: ; return to shader part epilog 143 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 144 %cast = bitcast double %val to <2 x float> 145 ret <2 x float> %cast 146} 147 148declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) #1 149declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1 150declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1 151 152attributes #0 = { strictfp } 153attributes #1 = { inaccessiblememonly nounwind willreturn } 154