1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include "mldev_utils_scalar.h" 6 7 /* Description: 8 * This file implements scalar versions of Machine Learning utility functions used to convert data 9 * types from higher precision to lower precision and vice-versa, except bfloat16. 10 */ 11 12 int 13 rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output) 14 { 15 float *input_buffer; 16 int8_t *output_buffer; 17 uint64_t i; 18 int i32; 19 20 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 21 return -EINVAL; 22 23 input_buffer = (float *)input; 24 output_buffer = (int8_t *)output; 25 26 for (i = 0; i < nb_elements; i++) { 27 i32 = (int32_t)round((*input_buffer) * scale); 28 29 if (i32 < INT8_MIN) 30 i32 = INT8_MIN; 31 32 if (i32 > INT8_MAX) 33 i32 = INT8_MAX; 34 35 *output_buffer = (int8_t)i32; 36 37 input_buffer++; 38 output_buffer++; 39 } 40 41 return 0; 42 } 43 44 int 45 rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 46 { 47 int8_t *input_buffer; 48 float *output_buffer; 49 uint64_t i; 50 51 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 52 return -EINVAL; 53 54 input_buffer = (int8_t *)input; 55 output_buffer = (float *)output; 56 57 for (i = 0; i < nb_elements; i++) { 58 *output_buffer = scale * (float)(*input_buffer); 59 60 input_buffer++; 61 output_buffer++; 62 } 63 64 return 0; 65 } 66 67 int 68 rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output) 69 { 70 float *input_buffer; 71 uint8_t *output_buffer; 72 int32_t i32; 73 uint64_t i; 74 75 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 76 return -EINVAL; 77 78 input_buffer = (float *)input; 79 output_buffer = (uint8_t *)output; 80 81 for (i = 0; i < nb_elements; i++) { 82 i32 = (int32_t)round((*input_buffer) * scale); 83 84 if (i32 < 0) 85 i32 = 0; 86 87 if (i32 > UINT8_MAX) 88 i32 = UINT8_MAX; 89 90 *output_buffer = (uint8_t)i32; 91 92 input_buffer++; 93 output_buffer++; 94 } 95 96 return 0; 97 } 98 99 int 100 rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 101 { 102 uint8_t *input_buffer; 103 float *output_buffer; 104 uint64_t i; 105 106 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 107 return -EINVAL; 108 109 input_buffer = (uint8_t *)input; 110 output_buffer = (float *)output; 111 112 for (i = 0; i < nb_elements; i++) { 113 *output_buffer = scale * (float)(*input_buffer); 114 115 input_buffer++; 116 output_buffer++; 117 } 118 119 return 0; 120 } 121 122 int 123 rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output) 124 { 125 float *input_buffer; 126 int16_t *output_buffer; 127 int32_t i32; 128 uint64_t i; 129 130 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 131 return -EINVAL; 132 133 input_buffer = (float *)input; 134 output_buffer = (int16_t *)output; 135 136 for (i = 0; i < nb_elements; i++) { 137 i32 = (int32_t)round((*input_buffer) * scale); 138 139 if (i32 < INT16_MIN) 140 i32 = INT16_MIN; 141 142 if (i32 > INT16_MAX) 143 i32 = INT16_MAX; 144 145 *output_buffer = (int16_t)i32; 146 147 input_buffer++; 148 output_buffer++; 149 } 150 151 return 0; 152 } 153 154 int 155 rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 156 { 157 int16_t *input_buffer; 158 float *output_buffer; 159 uint64_t i; 160 161 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 162 return -EINVAL; 163 164 input_buffer = (int16_t *)input; 165 output_buffer = (float *)output; 166 167 for (i = 0; i < nb_elements; i++) { 168 *output_buffer = scale * (float)(*input_buffer); 169 170 input_buffer++; 171 output_buffer++; 172 } 173 174 return 0; 175 } 176 177 int 178 rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output) 179 { 180 float *input_buffer; 181 uint16_t *output_buffer; 182 int32_t i32; 183 uint64_t i; 184 185 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 186 return -EINVAL; 187 188 input_buffer = (float *)input; 189 output_buffer = (uint16_t *)output; 190 191 for (i = 0; i < nb_elements; i++) { 192 i32 = (int32_t)round((*input_buffer) * scale); 193 194 if (i32 < 0) 195 i32 = 0; 196 197 if (i32 > UINT16_MAX) 198 i32 = UINT16_MAX; 199 200 *output_buffer = (uint16_t)i32; 201 202 input_buffer++; 203 output_buffer++; 204 } 205 206 return 0; 207 } 208 209 int 210 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 211 { 212 uint16_t *input_buffer; 213 float *output_buffer; 214 uint64_t i; 215 216 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 217 return -EINVAL; 218 219 input_buffer = (uint16_t *)input; 220 output_buffer = (float *)output; 221 222 for (i = 0; i < nb_elements; i++) { 223 *output_buffer = scale * (float)(*input_buffer); 224 225 input_buffer++; 226 output_buffer++; 227 } 228 229 return 0; 230 } 231 232 int 233 rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output) 234 { 235 float *input_buffer; 236 int32_t *output_buffer; 237 uint64_t i; 238 239 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 240 return -EINVAL; 241 242 input_buffer = (float *)input; 243 output_buffer = (int32_t *)output; 244 245 for (i = 0; i < nb_elements; i++) { 246 *output_buffer = (int32_t)round((*input_buffer) * scale); 247 248 input_buffer++; 249 output_buffer++; 250 } 251 252 return 0; 253 } 254 255 int 256 rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 257 { 258 int32_t *input_buffer; 259 float *output_buffer; 260 uint64_t i; 261 262 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 263 return -EINVAL; 264 265 input_buffer = (int32_t *)input; 266 output_buffer = (float *)output; 267 268 for (i = 0; i < nb_elements; i++) { 269 *output_buffer = scale * (float)(*input_buffer); 270 271 input_buffer++; 272 output_buffer++; 273 } 274 275 return 0; 276 } 277 278 int 279 rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output) 280 { 281 float *input_buffer; 282 uint32_t *output_buffer; 283 int32_t i32; 284 uint64_t i; 285 286 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 287 return -EINVAL; 288 289 input_buffer = (float *)input; 290 output_buffer = (uint32_t *)output; 291 292 for (i = 0; i < nb_elements; i++) { 293 i32 = (int32_t)round((*input_buffer) * scale); 294 295 if (i32 < 0) 296 i32 = 0; 297 298 *output_buffer = (uint32_t)i32; 299 300 input_buffer++; 301 output_buffer++; 302 } 303 304 return 0; 305 } 306 307 int 308 rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 309 { 310 uint32_t *input_buffer; 311 float *output_buffer; 312 uint64_t i; 313 314 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 315 return -EINVAL; 316 317 input_buffer = (uint32_t *)input; 318 output_buffer = (float *)output; 319 320 for (i = 0; i < nb_elements; i++) { 321 *output_buffer = scale * (float)(*input_buffer); 322 323 input_buffer++; 324 output_buffer++; 325 } 326 327 return 0; 328 } 329 330 /* Convert a single precision floating point number (float32) into a half precision 331 * floating point number (float16) using round to nearest rounding mode. 332 */ 333 static uint16_t 334 __float32_to_float16_scalar_rtn(float x) 335 { 336 union float32 f32; /* float32 input */ 337 uint32_t f32_s; /* float32 sign */ 338 uint32_t f32_e; /* float32 exponent */ 339 uint32_t f32_m; /* float32 mantissa */ 340 uint16_t f16_s; /* float16 sign */ 341 uint16_t f16_e; /* float16 exponent */ 342 uint16_t f16_m; /* float16 mantissa */ 343 uint32_t tbits; /* number of truncated bits */ 344 uint32_t tmsb; /* MSB position of truncated bits */ 345 uint32_t m_32; /* temporary float32 mantissa */ 346 uint16_t m_16; /* temporary float16 mantissa */ 347 uint16_t u16; /* float16 output */ 348 int be_16; /* float16 biased exponent, signed */ 349 350 f32.f = x; 351 f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; 352 f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; 353 f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; 354 355 f16_s = f32_s; 356 f16_e = 0; 357 f16_m = 0; 358 359 switch (f32_e) { 360 case (0): /* float32: zero or subnormal number */ 361 f16_e = 0; 362 f16_m = 0; /* convert to zero */ 363 break; 364 case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ 365 f16_e = FP16_MASK_E >> FP16_LSB_E; 366 if (f32_m == 0) { /* infinity */ 367 f16_m = 0; 368 } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ 369 f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M); 370 f16_m |= BIT(FP16_MSB_M); 371 } 372 break; 373 default: /* float32: normal number */ 374 /* compute biased exponent for float16 */ 375 be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E; 376 377 /* overflow, be_16 = [31-INF], set to infinity */ 378 if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) { 379 f16_e = FP16_MASK_E >> FP16_LSB_E; 380 f16_m = 0; 381 } else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) { 382 /* normal float16, be_16 = [1:30]*/ 383 f16_e = be_16; 384 m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E); 385 tmsb = FP32_MSB_M - FP16_MSB_M - 1; 386 if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) { 387 /* round: non-zero truncated bits except MSB */ 388 m_16++; 389 390 /* overflow into exponent */ 391 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 392 f16_e++; 393 } else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) { 394 /* round: MSB of truncated bits and LSB of m_16 is set */ 395 if ((m_16 & 0x1) == 0x1) { 396 m_16++; 397 398 /* overflow into exponent */ 399 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 400 f16_e++; 401 } 402 } 403 f16_m = m_16 & FP16_MASK_M; 404 } else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) { 405 /* underflow: zero / subnormal, be_16 = [-9:0] */ 406 f16_e = 0; 407 408 /* add implicit leading zero */ 409 m_32 = f32_m | BIT(FP32_LSB_E); 410 tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1; 411 m_16 = m_32 >> tbits; 412 413 /* if non-leading truncated bits are set */ 414 if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { 415 m_16++; 416 417 /* overflow into exponent */ 418 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 419 f16_e++; 420 } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { 421 /* if leading truncated bit is set */ 422 if ((m_16 & 0x1) == 0x1) { 423 m_16++; 424 425 /* overflow into exponent */ 426 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 427 f16_e++; 428 } 429 } 430 f16_m = m_16 & FP16_MASK_M; 431 } else if (be_16 == -(int)(FP16_MSB_M + 1)) { 432 /* underflow: zero, be_16 = [-10] */ 433 f16_e = 0; 434 if (f32_m != 0) 435 f16_m = 1; 436 else 437 f16_m = 0; 438 } else { 439 /* underflow: zero, be_16 = [-INF:-11] */ 440 f16_e = 0; 441 f16_m = 0; 442 } 443 444 break; 445 } 446 447 u16 = FP16_PACK(f16_s, f16_e, f16_m); 448 449 return u16; 450 } 451 452 int 453 rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output) 454 { 455 float *input_buffer; 456 uint16_t *output_buffer; 457 uint64_t i; 458 459 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 460 return -EINVAL; 461 462 input_buffer = (float *)input; 463 output_buffer = (uint16_t *)output; 464 465 for (i = 0; i < nb_elements; i++) { 466 *output_buffer = __float32_to_float16_scalar_rtn(*input_buffer); 467 468 input_buffer = input_buffer + 1; 469 output_buffer = output_buffer + 1; 470 } 471 472 return 0; 473 } 474 475 /* Convert a half precision floating point number (float16) into a single precision 476 * floating point number (float32). 477 */ 478 static float 479 __float16_to_float32_scalar_rtx(uint16_t f16) 480 { 481 union float32 f32; /* float32 output */ 482 uint16_t f16_s; /* float16 sign */ 483 uint16_t f16_e; /* float16 exponent */ 484 uint16_t f16_m; /* float16 mantissa */ 485 uint32_t f32_s; /* float32 sign */ 486 uint32_t f32_e; /* float32 exponent */ 487 uint32_t f32_m; /* float32 mantissa*/ 488 uint8_t shift; /* number of bits to be shifted */ 489 uint32_t clz; /* count of leading zeroes */ 490 int e_16; /* float16 exponent unbiased */ 491 492 f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S; 493 f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E; 494 f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M; 495 496 f32_s = f16_s; 497 switch (f16_e) { 498 case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */ 499 f32_e = FP32_MASK_E >> FP32_LSB_E; 500 if (f16_m == 0x0) { /* infinity */ 501 f32_m = f16_m; 502 } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ 503 f32_m = f16_m; 504 shift = FP32_MSB_M - FP16_MSB_M; 505 f32_m = (f32_m << shift) & FP32_MASK_M; 506 f32_m |= BIT(FP32_MSB_M); 507 } 508 break; 509 case 0: /* float16: zero or sub-normal */ 510 f32_m = f16_m; 511 if (f16_m == 0) { /* zero signed */ 512 f32_e = 0; 513 } else { /* subnormal numbers */ 514 clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E; 515 e_16 = (int)f16_e - clz; 516 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 517 518 shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1; 519 f32_m = (f32_m << shift) & FP32_MASK_M; 520 } 521 break; 522 default: /* normal numbers */ 523 f32_m = f16_m; 524 e_16 = (int)f16_e; 525 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 526 527 shift = (FP32_MSB_M - FP16_MSB_M); 528 f32_m = (f32_m << shift) & FP32_MASK_M; 529 } 530 531 f32.u = FP32_PACK(f32_s, f32_e, f32_m); 532 533 return f32.f; 534 } 535 536 int 537 rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output) 538 { 539 uint16_t *input_buffer; 540 float *output_buffer; 541 uint64_t i; 542 543 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 544 return -EINVAL; 545 546 input_buffer = (uint16_t *)input; 547 output_buffer = (float *)output; 548 549 for (i = 0; i < nb_elements; i++) { 550 *output_buffer = __float16_to_float32_scalar_rtx(*input_buffer); 551 552 input_buffer = input_buffer + 1; 553 output_buffer = output_buffer + 1; 554 } 555 556 return 0; 557 } 558