1*f3087befSAndrew Turner /* 2*f3087befSAndrew Turner * Single-precision SVE powi(x, n) function. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2020-2024, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner 8*f3087befSAndrew Turner #include "sv_math.h" 9*f3087befSAndrew Turner 10*f3087befSAndrew Turner /* Optimized single-precision vector powi (float base, integer power). 11*f3087befSAndrew Turner powi is developed for environments in which accuracy is of much less 12*f3087befSAndrew Turner importance than performance, hence we provide no estimate for worst-case 13*f3087befSAndrew Turner error. */ 14*f3087befSAndrew Turner svfloat32_t 15*f3087befSAndrew Turner _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p) 16*f3087befSAndrew Turner { 17*f3087befSAndrew Turner /* Compute powi by successive squaring, right to left. */ 18*f3087befSAndrew Turner svfloat32_t acc = sv_f32 (1.f); 19*f3087befSAndrew Turner svbool_t want_recip = svcmplt (p, ns, 0); 20*f3087befSAndrew Turner svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns)); 21*f3087befSAndrew Turner 22*f3087befSAndrew Turner /* We use a max to avoid needing to check whether any lane != 0 on each 23*f3087befSAndrew Turner iteration. */ 24*f3087befSAndrew Turner uint32_t max_n = svmaxv (p, ns_abs); 25*f3087befSAndrew Turner 26*f3087befSAndrew Turner svfloat32_t c = as; 27*f3087befSAndrew Turner /* Successively square c, and use merging predication (_m) to determine 28*f3087befSAndrew Turner whether or not to perform the multiplication or keep the previous 29*f3087befSAndrew Turner iteration. */ 30*f3087befSAndrew Turner while (true) 31*f3087befSAndrew Turner { 32*f3087befSAndrew Turner svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1); 33*f3087befSAndrew Turner acc = svmul_m (px, acc, c); 34*f3087befSAndrew Turner max_n >>= 1; 35*f3087befSAndrew Turner if (max_n == 0) 36*f3087befSAndrew Turner break; 37*f3087befSAndrew Turner 38*f3087befSAndrew Turner ns_abs = svlsr_x (p, ns_abs, 1); 39*f3087befSAndrew Turner c = svmul_x (p, c, c); 40*f3087befSAndrew Turner } 41*f3087befSAndrew Turner 42*f3087befSAndrew Turner /* Negative powers are handled by computing the abs(n) version and then 43*f3087befSAndrew Turner taking the reciprocal. */ 44*f3087befSAndrew Turner if (svptest_any (want_recip, want_recip)) 45*f3087befSAndrew Turner acc = svdivr_m (want_recip, acc, 1.0f); 46*f3087befSAndrew Turner 47*f3087befSAndrew Turner return acc; 48*f3087befSAndrew Turner } 49*f3087befSAndrew Turner CLOSE_SVE_ATTR 50