1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2 3declare float @llvm.amdgcn.rcp.f32(float) #0 4declare double @llvm.amdgcn.rcp.f64(double) #0 5 6declare double @llvm.amdgcn.sqrt.f64(double) #0 7declare float @llvm.amdgcn.sqrt.f32(float) #0 8declare double @llvm.sqrt.f64(double) #0 9declare float @llvm.sqrt.f32(float) #0 10 11; FUNC-LABEL: {{^}}rcp_undef_f32: 12; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000 13; SI-NOT: [[NAN]] 14; SI: buffer_store_dword [[NAN]] 15define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 { 16 %rcp = call float @llvm.amdgcn.rcp.f32(float undef) 17 store float %rcp, ptr addrspace(1) %out, align 4 18 ret void 19} 20 21; FUNC-LABEL: {{^}}rcp_2_f32: 22; SI-NOT: v_rcp_f32 23; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5 24define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 { 25 %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0) 26 store float %rcp, ptr addrspace(1) %out, align 4 27 ret void 28} 29 30; FUNC-LABEL: {{^}}rcp_10_f32: 31; SI-NOT: v_rcp_f32 32; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd 33define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 { 34 %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0) 35 store float %rcp, ptr addrspace(1) %out, align 4 36 ret void 37} 38 39; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32: 40; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 41; SI-NOT: [[RESULT]] 42; SI: buffer_store_dword [[RESULT]] 43define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) #1 { 44 %rcp = fdiv float 1.0, %src, !fpmath !0 45 store float %rcp, ptr addrspace(1) %out, align 4 46 ret void 47} 48 49; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32: 50; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 51; SI-NOT: [[RESULT]] 52; SI: buffer_store_dword [[RESULT]] 53define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 { 54 %rcp = fdiv float 1.0, %src, !fpmath !0 55 store float %rcp, ptr addrspace(1) %out, align 4 56 ret void 57} 58 59; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: 60; SI: v_div_scale_f32 61define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #3 { 62 %rcp = fdiv float 1.0, %src 63 store float %rcp, ptr addrspace(1) %out, align 4 64 ret void 65} 66 67; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: 68; SI: v_mul_f32 69; SI: v_rsq_f32 70; SI: v_mul_f32 71; SI: v_fma_f32 72; SI: v_fma_f32 73; SI: v_fma_f32 74; SI: v_fma_f32 75; SI: v_fma_f32 76; SI: v_rcp_f32 77define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #1 { 78 %sqrt = call contract float @llvm.sqrt.f32(float %src) 79 %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt) 80 store float %rcp, ptr addrspace(1) %out, align 4 81 ret void 82} 83 84; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32: 85; SI: v_sqrt_f32_e32 86; SI: v_rcp_f32_e32 87define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %out, float %src) #1 { 88 %sqrt = call contract float @llvm.amdgcn.sqrt.f32(float %src) 89 %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt) 90 store float %rcp, ptr addrspace(1) %out, align 4 91 ret void 92} 93 94; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract: 95; SI: v_sqrt_f32_e32 96; SI: v_rcp_f32_e32 97define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrspace(1) %out, float %src) #1 { 98 %sqrt = call float @llvm.amdgcn.sqrt.f32(float %src) 99 %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt) 100 store float %rcp, ptr addrspace(1) %out, align 4 101 ret void 102} 103 104; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: 105; SI: v_sqrt_f32_e32 106; SI: v_rcp_f32_e32 107define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 { 108 %sqrt = call float @llvm.sqrt.f32(float %src) 109 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 110 store float %rcp, ptr addrspace(1) %out, align 4 111 ret void 112} 113 114; FUNC-LABEL: {{^}}rcp_f64: 115; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 116; SI-NOT: [[RESULT]] 117; SI: buffer_store_dwordx2 [[RESULT]] 118define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 { 119 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 120 store double %rcp, ptr addrspace(1) %out, align 8 121 ret void 122} 123 124; FUNC-LABEL: {{^}}unsafe_rcp_f64: 125; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 126; SI-NOT: [[RESULT]] 127; SI: buffer_store_dwordx2 [[RESULT]] 128define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2 { 129 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 130 store double %rcp, ptr addrspace(1) %out, align 8 131 ret void 132} 133 134; FUNC-LABEL: {{^}}rcp_pat_f64: 135; SI: v_div_scale_f64 136define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 { 137 %rcp = fdiv double 1.0, %src 138 store double %rcp, ptr addrspace(1) %out, align 8 139 ret void 140} 141 142; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64: 143; SI: v_rcp_f64 144; SI: v_fma_f64 145; SI: v_fma_f64 146; SI: v_fma_f64 147; SI: v_fma_f64 148; SI: v_fma_f64 149; SI: v_fma_f64 150define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 { 151 %rcp = fdiv double 1.0, %src 152 store double %rcp, ptr addrspace(1) %out, align 8 153 ret void 154} 155 156; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64: 157; SI-NOT: v_rsq_f64_e32 158; SI: v_rsq_f64 159; SI: v_mul_f64 160; SI: v_mul_f64 161; SI: v_fma_f64 162; SI: v_fma_f64 163; SI: v_fma_f64 164; SI: v_fma_f64 165; SI: v_fma_f64 166; SI: v_fma_f64 167; SI: v_rcp_f64 168define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 { 169 %sqrt = call double @llvm.sqrt.f64(double %src) 170 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 171 store double %rcp, ptr addrspace(1) %out, align 8 172 ret void 173} 174 175; FUNC-LABEL: {{^}}safe_amdgcn_sqrt_rsq_rcp_pat_f64: 176; SI-NOT: v_rsq_f64_e32 177; SI: v_sqrt_f64 178; SI: v_rcp_f64 179define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 { 180 %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src) 181 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 182 store double %rcp, ptr addrspace(1) %out, align 8 183 ret void 184} 185 186; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: 187; SI: v_rsq_f64 188; SI: v_mul_f64 189; SI: v_mul_f64 190; SI: v_fma_f64 191; SI: v_fma_f64 192; SI: v_fma_f64 193; SI: v_fma_f64 194; SI: v_fma_f64 195; SI: v_fma_f64 196; SI: v_rcp_f64 197; SI: buffer_store_dwordx2 198define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 { 199 %sqrt = call double @llvm.sqrt.f64(double %src) 200 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 201 store double %rcp, ptr addrspace(1) %out, align 8 202 ret void 203} 204 205; FUNC-LABEL: {{^}}unsafe_amdgcn_sqrt_rsq_rcp_pat_f64: 206; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 207; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]] 208; SI: buffer_store_dwordx2 [[RESULT]] 209define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 { 210 %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src) 211 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 212 store double %rcp, ptr addrspace(1) %out, align 8 213 ret void 214} 215 216attributes #0 = { nounwind readnone } 217attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 218attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 219attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" } 220attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" } 221 222!0 = !{float 2.500000e+00} 223