1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s 5 6declare double @llvm.fma.f64(double, double, double) nounwind readnone 7declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 8declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 9declare double @llvm.fabs.f64(double) nounwind readnone 10 11; FUNC-LABEL: {{^}}fma_f64: 12; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 13; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 14define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, 15 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 16 %r0 = load double, ptr addrspace(1) %in1 17 %r1 = load double, ptr addrspace(1) %in2 18 %r2 = load double, ptr addrspace(1) %in3 19 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) 20 store double %r3, ptr addrspace(1) %out 21 ret void 22} 23 24; FUNC-LABEL: {{^}}fma_v2f64: 25; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 26; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 27; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 28; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 29define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, 30 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 31 %r0 = load <2 x double>, ptr addrspace(1) %in1 32 %r1 = load <2 x double>, ptr addrspace(1) %in2 33 %r2 = load <2 x double>, ptr addrspace(1) %in3 34 %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) 35 store <2 x double> %r3, ptr addrspace(1) %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}fma_v4f64: 40; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 41; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 42; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 43; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 44; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 45; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 46; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 47; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 48define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, 49 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 50 %r0 = load <4 x double>, ptr addrspace(1) %in1 51 %r1 = load <4 x double>, ptr addrspace(1) %in2 52 %r2 = load <4 x double>, ptr addrspace(1) %in3 53 %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) 54 store <4 x double> %r3, ptr addrspace(1) %out 55 ret void 56} 57 58; FUNC-LABEL: {{^}}fma_f64_abs_src0: 59; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 60define amdgpu_kernel void @fma_f64_abs_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1, 61 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 62 %r0 = load double, ptr addrspace(1) %in1 63 %r1 = load double, ptr addrspace(1) %in2 64 %r2 = load double, ptr addrspace(1) %in3 65 %fabs = call double @llvm.fabs.f64(double %r0) 66 %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2) 67 store double %r3, ptr addrspace(1) %out 68 ret void 69} 70 71; FUNC-LABEL: {{^}}fma_f64_abs_src1: 72; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}} 73define amdgpu_kernel void @fma_f64_abs_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1, 74 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 75 %r0 = load double, ptr addrspace(1) %in1 76 %r1 = load double, ptr addrspace(1) %in2 77 %r2 = load double, ptr addrspace(1) %in3 78 %fabs = call double @llvm.fabs.f64(double %r1) 79 %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2) 80 store double %r3, ptr addrspace(1) %out 81 ret void 82} 83 84; FUNC-LABEL: {{^}}fma_f64_abs_src2: 85; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}} 86define amdgpu_kernel void @fma_f64_abs_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1, 87 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 88 %r0 = load double, ptr addrspace(1) %in1 89 %r1 = load double, ptr addrspace(1) %in2 90 %r2 = load double, ptr addrspace(1) %in3 91 %fabs = call double @llvm.fabs.f64(double %r2) 92 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs) 93 store double %r3, ptr addrspace(1) %out 94 ret void 95} 96 97; FUNC-LABEL: {{^}}fma_f64_neg_src0: 98; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 99define amdgpu_kernel void @fma_f64_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1, 100 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 101 %r0 = load double, ptr addrspace(1) %in1 102 %r1 = load double, ptr addrspace(1) %in2 103 %r2 = load double, ptr addrspace(1) %in3 104 %fsub = fsub double -0.000000e+00, %r0 105 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2) 106 store double %r3, ptr addrspace(1) %out 107 ret void 108} 109 110; FUNC-LABEL: {{^}}fma_f64_neg_src1: 111; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 112define amdgpu_kernel void @fma_f64_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1, 113 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 114 %r0 = load double, ptr addrspace(1) %in1 115 %r1 = load double, ptr addrspace(1) %in2 116 %r2 = load double, ptr addrspace(1) %in3 117 %fsub = fsub double -0.000000e+00, %r1 118 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2) 119 store double %r3, ptr addrspace(1) %out 120 ret void 121} 122 123; FUNC-LABEL: {{^}}fma_f64_neg_src2: 124; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} 125define amdgpu_kernel void @fma_f64_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1, 126 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 127 %r0 = load double, ptr addrspace(1) %in1 128 %r1 = load double, ptr addrspace(1) %in2 129 %r2 = load double, ptr addrspace(1) %in3 130 %fsub = fsub double -0.000000e+00, %r2 131 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub) 132 store double %r3, ptr addrspace(1) %out 133 ret void 134} 135 136; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0: 137; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 138define amdgpu_kernel void @fma_f64_abs_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1, 139 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 140 %r0 = load double, ptr addrspace(1) %in1 141 %r1 = load double, ptr addrspace(1) %in2 142 %r2 = load double, ptr addrspace(1) %in3 143 %fabs = call double @llvm.fabs.f64(double %r0) 144 %fsub = fsub double -0.000000e+00, %fabs 145 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2) 146 store double %r3, ptr addrspace(1) %out 147 ret void 148} 149 150; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1: 151; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}} 152define amdgpu_kernel void @fma_f64_abs_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1, 153 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 154 %r0 = load double, ptr addrspace(1) %in1 155 %r1 = load double, ptr addrspace(1) %in2 156 %r2 = load double, ptr addrspace(1) %in3 157 %fabs = call double @llvm.fabs.f64(double %r1) 158 %fsub = fsub double -0.000000e+00, %fabs 159 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2) 160 store double %r3, ptr addrspace(1) %out 161 ret void 162} 163 164; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2: 165; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}} 166define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1, 167 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 168 %r0 = load double, ptr addrspace(1) %in1 169 %r1 = load double, ptr addrspace(1) %in2 170 %r2 = load double, ptr addrspace(1) %in3 171 %fabs = call double @llvm.fabs.f64(double %r2) 172 %fsub = fsub double -0.000000e+00, %fabs 173 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub) 174 store double %r3, ptr addrspace(1) %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}fma_f64_lit_src0: 179; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 180; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 181define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out, 182 ptr addrspace(1) %in2, ptr addrspace(1) %in3) { 183 %r1 = load double, ptr addrspace(1) %in2 184 %r2 = load double, ptr addrspace(1) %in3 185 %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2) 186 store double %r3, ptr addrspace(1) %out 187 ret void 188} 189 190; FUNC-LABEL: {{^}}fma_f64_lit_src1: 191; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 192; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 193define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1, 194 ptr addrspace(1) %in3) { 195 %r0 = load double, ptr addrspace(1) %in1 196 %r2 = load double, ptr addrspace(1) %in3 197 %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2) 198 store double %r3, ptr addrspace(1) %out 199 ret void 200} 201 202; FUNC-LABEL: {{^}}fma_f64_lit_src2: 203; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}} 204define amdgpu_kernel void @fma_f64_lit_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1, 205 ptr addrspace(1) %in2) { 206 %r0 = load double, ptr addrspace(1) %in1 207 %r1 = load double, ptr addrspace(1) %in2 208 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0) 209 store double %r3, ptr addrspace(1) %out 210 ret void 211} 212