1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include "mldev_utils_scalar.h" 6 7 /* Description: 8 * This file implements scalar versions of Machine Learning utility functions used to convert data 9 * types from higher precision to lower precision and vice-versa, except bfloat16. 10 */ 11 12 int 13 rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output) 14 { 15 float *input_buffer; 16 int8_t *output_buffer; 17 uint64_t i; 18 int i32; 19 20 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 21 return -EINVAL; 22 23 input_buffer = (float *)input; 24 output_buffer = (int8_t *)output; 25 26 for (i = 0; i < nb_elements; i++) { 27 i32 = (int32_t)round((*input_buffer) * scale); 28 29 if (i32 < INT8_MIN) 30 i32 = INT8_MIN; 31 32 if (i32 > INT8_MAX) 33 i32 = INT8_MAX; 34 35 *output_buffer = (int8_t)i32; 36 37 input_buffer++; 38 output_buffer++; 39 } 40 41 return 0; 42 } 43 44 int 45 rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 46 { 47 int8_t *input_buffer; 48 float *output_buffer; 49 uint64_t i; 50 51 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 52 return -EINVAL; 53 54 input_buffer = (int8_t *)input; 55 output_buffer = (float *)output; 56 57 for (i = 0; i < nb_elements; i++) { 58 *output_buffer = scale * (float)(*input_buffer); 59 60 input_buffer++; 61 output_buffer++; 62 } 63 64 return 0; 65 } 66 67 int 68 rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output) 69 { 70 float *input_buffer; 71 uint8_t *output_buffer; 72 int32_t i32; 73 uint64_t i; 74 75 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 76 return -EINVAL; 77 78 input_buffer = (float *)input; 79 output_buffer = (uint8_t *)output; 80 81 for (i = 0; i < nb_elements; i++) { 82 i32 = (int32_t)round((*input_buffer) * scale); 83 84 if (i32 < 0) 85 i32 = 0; 86 87 if (i32 > UINT8_MAX) 88 i32 = UINT8_MAX; 89 90 *output_buffer = (uint8_t)i32; 91 92 input_buffer++; 93 output_buffer++; 94 } 95 96 return 0; 97 } 98 99 int 100 rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 101 { 102 uint8_t *input_buffer; 103 float *output_buffer; 104 uint64_t i; 105 106 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 107 return -EINVAL; 108 109 input_buffer = (uint8_t *)input; 110 output_buffer = (float *)output; 111 112 for (i = 0; i < nb_elements; i++) { 113 *output_buffer = scale * (float)(*input_buffer); 114 115 input_buffer++; 116 output_buffer++; 117 } 118 119 return 0; 120 } 121 122 int 123 rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output) 124 { 125 float *input_buffer; 126 int16_t *output_buffer; 127 int32_t i32; 128 uint64_t i; 129 130 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 131 return -EINVAL; 132 133 input_buffer = (float *)input; 134 output_buffer = (int16_t *)output; 135 136 for (i = 0; i < nb_elements; i++) { 137 i32 = (int32_t)round((*input_buffer) * scale); 138 139 if (i32 < INT16_MIN) 140 i32 = INT16_MIN; 141 142 if (i32 > INT16_MAX) 143 i32 = INT16_MAX; 144 145 *output_buffer = (int16_t)i32; 146 147 input_buffer++; 148 output_buffer++; 149 } 150 151 return 0; 152 } 153 154 int 155 rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 156 { 157 int16_t *input_buffer; 158 float *output_buffer; 159 uint64_t i; 160 161 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 162 return -EINVAL; 163 164 input_buffer = (int16_t *)input; 165 output_buffer = (float *)output; 166 167 for (i = 0; i < nb_elements; i++) { 168 *output_buffer = scale * (float)(*input_buffer); 169 170 input_buffer++; 171 output_buffer++; 172 } 173 174 return 0; 175 } 176 177 int 178 rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output) 179 { 180 float *input_buffer; 181 uint16_t *output_buffer; 182 int32_t i32; 183 uint64_t i; 184 185 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 186 return -EINVAL; 187 188 input_buffer = (float *)input; 189 output_buffer = (uint16_t *)output; 190 191 for (i = 0; i < nb_elements; i++) { 192 i32 = (int32_t)round((*input_buffer) * scale); 193 194 if (i32 < 0) 195 i32 = 0; 196 197 if (i32 > UINT16_MAX) 198 i32 = UINT16_MAX; 199 200 *output_buffer = (uint16_t)i32; 201 202 input_buffer++; 203 output_buffer++; 204 } 205 206 return 0; 207 } 208 209 int 210 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 211 { 212 uint16_t *input_buffer; 213 float *output_buffer; 214 uint64_t i; 215 216 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 217 return -EINVAL; 218 219 input_buffer = (uint16_t *)input; 220 output_buffer = (float *)output; 221 222 for (i = 0; i < nb_elements; i++) { 223 *output_buffer = scale * (float)(*input_buffer); 224 225 input_buffer++; 226 output_buffer++; 227 } 228 229 return 0; 230 } 231 232 /* Convert a single precision floating point number (float32) into a half precision 233 * floating point number (float16) using round to nearest rounding mode. 234 */ 235 static uint16_t 236 __float32_to_float16_scalar_rtn(float x) 237 { 238 union float32 f32; /* float32 input */ 239 uint32_t f32_s; /* float32 sign */ 240 uint32_t f32_e; /* float32 exponent */ 241 uint32_t f32_m; /* float32 mantissa */ 242 uint16_t f16_s; /* float16 sign */ 243 uint16_t f16_e; /* float16 exponent */ 244 uint16_t f16_m; /* float16 mantissa */ 245 uint32_t tbits; /* number of truncated bits */ 246 uint32_t tmsb; /* MSB position of truncated bits */ 247 uint32_t m_32; /* temporary float32 mantissa */ 248 uint16_t m_16; /* temporary float16 mantissa */ 249 uint16_t u16; /* float16 output */ 250 int be_16; /* float16 biased exponent, signed */ 251 252 f32.f = x; 253 f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; 254 f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; 255 f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; 256 257 f16_s = f32_s; 258 f16_e = 0; 259 f16_m = 0; 260 261 switch (f32_e) { 262 case (0): /* float32: zero or subnormal number */ 263 f16_e = 0; 264 f16_m = 0; /* convert to zero */ 265 break; 266 case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ 267 f16_e = FP16_MASK_E >> FP16_LSB_E; 268 if (f32_m == 0) { /* infinity */ 269 f16_m = 0; 270 } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ 271 f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M); 272 f16_m |= BIT(FP16_MSB_M); 273 } 274 break; 275 default: /* float32: normal number */ 276 /* compute biased exponent for float16 */ 277 be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E; 278 279 /* overflow, be_16 = [31-INF], set to infinity */ 280 if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) { 281 f16_e = FP16_MASK_E >> FP16_LSB_E; 282 f16_m = 0; 283 } else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) { 284 /* normal float16, be_16 = [1:30]*/ 285 f16_e = be_16; 286 m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E); 287 tmsb = FP32_MSB_M - FP16_MSB_M - 1; 288 if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) { 289 /* round: non-zero truncated bits except MSB */ 290 m_16++; 291 292 /* overflow into exponent */ 293 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 294 f16_e++; 295 } else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) { 296 /* round: MSB of truncated bits and LSB of m_16 is set */ 297 if ((m_16 & 0x1) == 0x1) { 298 m_16++; 299 300 /* overflow into exponent */ 301 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 302 f16_e++; 303 } 304 } 305 f16_m = m_16 & FP16_MASK_M; 306 } else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) { 307 /* underflow: zero / subnormal, be_16 = [-9:0] */ 308 f16_e = 0; 309 310 /* add implicit leading zero */ 311 m_32 = f32_m | BIT(FP32_LSB_E); 312 tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1; 313 m_16 = m_32 >> tbits; 314 315 /* if non-leading truncated bits are set */ 316 if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { 317 m_16++; 318 319 /* overflow into exponent */ 320 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 321 f16_e++; 322 } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { 323 /* if leading truncated bit is set */ 324 if ((m_16 & 0x1) == 0x1) { 325 m_16++; 326 327 /* overflow into exponent */ 328 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 329 f16_e++; 330 } 331 } 332 f16_m = m_16 & FP16_MASK_M; 333 } else if (be_16 == -(int)(FP16_MSB_M + 1)) { 334 /* underflow: zero, be_16 = [-10] */ 335 f16_e = 0; 336 if (f32_m != 0) 337 f16_m = 1; 338 else 339 f16_m = 0; 340 } else { 341 /* underflow: zero, be_16 = [-INF:-11] */ 342 f16_e = 0; 343 f16_m = 0; 344 } 345 346 break; 347 } 348 349 u16 = FP16_PACK(f16_s, f16_e, f16_m); 350 351 return u16; 352 } 353 354 int 355 rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output) 356 { 357 float *input_buffer; 358 uint16_t *output_buffer; 359 uint64_t i; 360 361 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 362 return -EINVAL; 363 364 input_buffer = (float *)input; 365 output_buffer = (uint16_t *)output; 366 367 for (i = 0; i < nb_elements; i++) { 368 *output_buffer = __float32_to_float16_scalar_rtn(*input_buffer); 369 370 input_buffer = input_buffer + 1; 371 output_buffer = output_buffer + 1; 372 } 373 374 return 0; 375 } 376 377 /* Convert a half precision floating point number (float16) into a single precision 378 * floating point number (float32). 379 */ 380 static float 381 __float16_to_float32_scalar_rtx(uint16_t f16) 382 { 383 union float32 f32; /* float32 output */ 384 uint16_t f16_s; /* float16 sign */ 385 uint16_t f16_e; /* float16 exponent */ 386 uint16_t f16_m; /* float16 mantissa */ 387 uint32_t f32_s; /* float32 sign */ 388 uint32_t f32_e; /* float32 exponent */ 389 uint32_t f32_m; /* float32 mantissa*/ 390 uint8_t shift; /* number of bits to be shifted */ 391 uint32_t clz; /* count of leading zeroes */ 392 int e_16; /* float16 exponent unbiased */ 393 394 f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S; 395 f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E; 396 f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M; 397 398 f32_s = f16_s; 399 switch (f16_e) { 400 case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */ 401 f32_e = FP32_MASK_E >> FP32_LSB_E; 402 if (f16_m == 0x0) { /* infinity */ 403 f32_m = f16_m; 404 } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ 405 f32_m = f16_m; 406 shift = FP32_MSB_M - FP16_MSB_M; 407 f32_m = (f32_m << shift) & FP32_MASK_M; 408 f32_m |= BIT(FP32_MSB_M); 409 } 410 break; 411 case 0: /* float16: zero or sub-normal */ 412 f32_m = f16_m; 413 if (f16_m == 0) { /* zero signed */ 414 f32_e = 0; 415 } else { /* subnormal numbers */ 416 clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E; 417 e_16 = (int)f16_e - clz; 418 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 419 420 shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1; 421 f32_m = (f32_m << shift) & FP32_MASK_M; 422 } 423 break; 424 default: /* normal numbers */ 425 f32_m = f16_m; 426 e_16 = (int)f16_e; 427 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 428 429 shift = (FP32_MSB_M - FP16_MSB_M); 430 f32_m = (f32_m << shift) & FP32_MASK_M; 431 } 432 433 f32.u = FP32_PACK(f32_s, f32_e, f32_m); 434 435 return f32.f; 436 } 437 438 int 439 rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output) 440 { 441 uint16_t *input_buffer; 442 float *output_buffer; 443 uint64_t i; 444 445 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 446 return -EINVAL; 447 448 input_buffer = (uint16_t *)input; 449 output_buffer = (float *)output; 450 451 for (i = 0; i < nb_elements; i++) { 452 *output_buffer = __float16_to_float32_scalar_rtx(*input_buffer); 453 454 input_buffer = input_buffer + 1; 455 output_buffer = output_buffer + 1; 456 } 457 458 return 0; 459 } 460