1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2022 Marvell. 3 */ 4 5 #include "mldev_utils_scalar.h" 6 7 /* Description: 8 * This file implements scalar versions of Machine Learning utility functions used to convert data 9 * types from higher precision to lower precision and vice-versa, except bfloat16. 10 */ 11 12 int 13 rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output) 14 { 15 float *input_buffer; 16 int8_t *output_buffer; 17 uint64_t i; 18 int i32; 19 20 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 21 return -EINVAL; 22 23 input_buffer = (float *)input; 24 output_buffer = (int8_t *)output; 25 26 for (i = 0; i < nb_elements; i++) { 27 i32 = (int32_t)round((*input_buffer) * scale); 28 29 if (i32 < INT8_MIN) 30 i32 = INT8_MIN; 31 32 if (i32 > INT8_MAX) 33 i32 = INT8_MAX; 34 35 *output_buffer = (int8_t)i32; 36 37 input_buffer++; 38 output_buffer++; 39 } 40 41 return 0; 42 } 43 44 int 45 rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 46 { 47 int8_t *input_buffer; 48 float *output_buffer; 49 uint64_t i; 50 51 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 52 return -EINVAL; 53 54 input_buffer = (int8_t *)input; 55 output_buffer = (float *)output; 56 57 for (i = 0; i < nb_elements; i++) { 58 *output_buffer = scale * (float)(*input_buffer); 59 60 input_buffer++; 61 output_buffer++; 62 } 63 64 return 0; 65 } 66 67 int 68 rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output) 69 { 70 float *input_buffer; 71 uint8_t *output_buffer; 72 int32_t i32; 73 uint64_t i; 74 75 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 76 return -EINVAL; 77 78 input_buffer = (float *)input; 79 output_buffer = (uint8_t *)output; 80 81 for (i = 0; i < nb_elements; i++) { 82 i32 = (int32_t)round((*input_buffer) * scale); 83 84 if (i32 < 0) 85 i32 = 0; 86 87 if (i32 > UINT8_MAX) 88 i32 = UINT8_MAX; 89 90 *output_buffer = (uint8_t)i32; 91 92 input_buffer++; 93 output_buffer++; 94 } 95 96 return 0; 97 } 98 99 int 100 rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 101 { 102 uint8_t *input_buffer; 103 float *output_buffer; 104 uint64_t i; 105 106 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 107 return -EINVAL; 108 109 input_buffer = (uint8_t *)input; 110 output_buffer = (float *)output; 111 112 for (i = 0; i < nb_elements; i++) { 113 *output_buffer = scale * (float)(*input_buffer); 114 115 input_buffer++; 116 output_buffer++; 117 } 118 119 return 0; 120 } 121 122 int 123 rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output) 124 { 125 float *input_buffer; 126 int16_t *output_buffer; 127 int32_t i32; 128 uint64_t i; 129 130 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 131 return -EINVAL; 132 133 input_buffer = (float *)input; 134 output_buffer = (int16_t *)output; 135 136 for (i = 0; i < nb_elements; i++) { 137 i32 = (int32_t)round((*input_buffer) * scale); 138 139 if (i32 < INT16_MIN) 140 i32 = INT16_MIN; 141 142 if (i32 > INT16_MAX) 143 i32 = INT16_MAX; 144 145 *output_buffer = (int16_t)i32; 146 147 input_buffer++; 148 output_buffer++; 149 } 150 151 return 0; 152 } 153 154 int 155 rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 156 { 157 int16_t *input_buffer; 158 float *output_buffer; 159 uint64_t i; 160 161 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 162 return -EINVAL; 163 164 input_buffer = (int16_t *)input; 165 output_buffer = (float *)output; 166 167 for (i = 0; i < nb_elements; i++) { 168 *output_buffer = scale * (float)(*input_buffer); 169 170 input_buffer++; 171 output_buffer++; 172 } 173 174 return 0; 175 } 176 177 int 178 rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output) 179 { 180 float *input_buffer; 181 uint16_t *output_buffer; 182 int32_t i32; 183 uint64_t i; 184 185 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 186 return -EINVAL; 187 188 input_buffer = (float *)input; 189 output_buffer = (uint16_t *)output; 190 191 for (i = 0; i < nb_elements; i++) { 192 i32 = (int32_t)round((*input_buffer) * scale); 193 194 if (i32 < 0) 195 i32 = 0; 196 197 if (i32 > UINT16_MAX) 198 i32 = UINT16_MAX; 199 200 *output_buffer = (uint16_t)i32; 201 202 input_buffer++; 203 output_buffer++; 204 } 205 206 return 0; 207 } 208 209 int 210 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 211 { 212 uint16_t *input_buffer; 213 float *output_buffer; 214 uint64_t i; 215 216 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 217 return -EINVAL; 218 219 input_buffer = (uint16_t *)input; 220 output_buffer = (float *)output; 221 222 for (i = 0; i < nb_elements; i++) { 223 *output_buffer = scale * (float)(*input_buffer); 224 225 input_buffer++; 226 output_buffer++; 227 } 228 229 return 0; 230 } 231 232 int 233 rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output) 234 { 235 float *input_buffer; 236 int32_t *output_buffer; 237 uint64_t i; 238 239 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 240 return -EINVAL; 241 242 input_buffer = (float *)input; 243 output_buffer = (int32_t *)output; 244 245 for (i = 0; i < nb_elements; i++) { 246 *output_buffer = (int32_t)round((*input_buffer) * scale); 247 248 input_buffer++; 249 output_buffer++; 250 } 251 252 return 0; 253 } 254 255 int 256 rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 257 { 258 int32_t *input_buffer; 259 float *output_buffer; 260 uint64_t i; 261 262 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 263 return -EINVAL; 264 265 input_buffer = (int32_t *)input; 266 output_buffer = (float *)output; 267 268 for (i = 0; i < nb_elements; i++) { 269 *output_buffer = scale * (float)(*input_buffer); 270 271 input_buffer++; 272 output_buffer++; 273 } 274 275 return 0; 276 } 277 278 int 279 rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output) 280 { 281 float *input_buffer; 282 uint32_t *output_buffer; 283 int32_t i32; 284 uint64_t i; 285 286 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 287 return -EINVAL; 288 289 input_buffer = (float *)input; 290 output_buffer = (uint32_t *)output; 291 292 for (i = 0; i < nb_elements; i++) { 293 i32 = (int32_t)round((*input_buffer) * scale); 294 295 if (i32 < 0) 296 i32 = 0; 297 298 *output_buffer = (uint32_t)i32; 299 300 input_buffer++; 301 output_buffer++; 302 } 303 304 return 0; 305 } 306 307 int 308 rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 309 { 310 uint32_t *input_buffer; 311 float *output_buffer; 312 uint64_t i; 313 314 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 315 return -EINVAL; 316 317 input_buffer = (uint32_t *)input; 318 output_buffer = (float *)output; 319 320 for (i = 0; i < nb_elements; i++) { 321 *output_buffer = scale * (float)(*input_buffer); 322 323 input_buffer++; 324 output_buffer++; 325 } 326 327 return 0; 328 } 329 330 int 331 rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output) 332 { 333 float *input_buffer; 334 int64_t *output_buffer; 335 uint64_t i; 336 337 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 338 return -EINVAL; 339 340 input_buffer = (float *)input; 341 output_buffer = (int64_t *)output; 342 343 for (i = 0; i < nb_elements; i++) { 344 *output_buffer = (int64_t)round((*input_buffer) * scale); 345 346 input_buffer++; 347 output_buffer++; 348 } 349 350 return 0; 351 } 352 353 int 354 rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 355 { 356 int64_t *input_buffer; 357 float *output_buffer; 358 uint64_t i; 359 360 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 361 return -EINVAL; 362 363 input_buffer = (int64_t *)input; 364 output_buffer = (float *)output; 365 366 for (i = 0; i < nb_elements; i++) { 367 *output_buffer = scale * (float)(*input_buffer); 368 369 input_buffer++; 370 output_buffer++; 371 } 372 373 return 0; 374 } 375 376 int 377 rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output) 378 { 379 float *input_buffer; 380 uint64_t *output_buffer; 381 int64_t i64; 382 uint64_t i; 383 384 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 385 return -EINVAL; 386 387 input_buffer = (float *)input; 388 output_buffer = (uint64_t *)output; 389 390 for (i = 0; i < nb_elements; i++) { 391 i64 = (int64_t)round((*input_buffer) * scale); 392 393 if (i64 < 0) 394 i64 = 0; 395 396 *output_buffer = (uint64_t)i64; 397 398 input_buffer++; 399 output_buffer++; 400 } 401 402 return 0; 403 } 404 405 int 406 rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output) 407 { 408 uint64_t *input_buffer; 409 float *output_buffer; 410 uint64_t i; 411 412 if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL)) 413 return -EINVAL; 414 415 input_buffer = (uint64_t *)input; 416 output_buffer = (float *)output; 417 418 for (i = 0; i < nb_elements; i++) { 419 *output_buffer = scale * (float)(*input_buffer); 420 421 input_buffer++; 422 output_buffer++; 423 } 424 425 return 0; 426 } 427 428 /* Convert a single precision floating point number (float32) into a half precision 429 * floating point number (float16) using round to nearest rounding mode. 430 */ 431 static uint16_t 432 __float32_to_float16_scalar_rtn(float x) 433 { 434 union float32 f32; /* float32 input */ 435 uint32_t f32_s; /* float32 sign */ 436 uint32_t f32_e; /* float32 exponent */ 437 uint32_t f32_m; /* float32 mantissa */ 438 uint16_t f16_s; /* float16 sign */ 439 uint16_t f16_e; /* float16 exponent */ 440 uint16_t f16_m; /* float16 mantissa */ 441 uint32_t tbits; /* number of truncated bits */ 442 uint32_t tmsb; /* MSB position of truncated bits */ 443 uint32_t m_32; /* temporary float32 mantissa */ 444 uint16_t m_16; /* temporary float16 mantissa */ 445 uint16_t u16; /* float16 output */ 446 int be_16; /* float16 biased exponent, signed */ 447 448 f32.f = x; 449 f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; 450 f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; 451 f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; 452 453 f16_s = f32_s; 454 f16_e = 0; 455 f16_m = 0; 456 457 switch (f32_e) { 458 case (0): /* float32: zero or subnormal number */ 459 f16_e = 0; 460 f16_m = 0; /* convert to zero */ 461 break; 462 case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ 463 f16_e = FP16_MASK_E >> FP16_LSB_E; 464 if (f32_m == 0) { /* infinity */ 465 f16_m = 0; 466 } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ 467 f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M); 468 f16_m |= BIT(FP16_MSB_M); 469 } 470 break; 471 default: /* float32: normal number */ 472 /* compute biased exponent for float16 */ 473 be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E; 474 475 /* overflow, be_16 = [31-INF], set to infinity */ 476 if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) { 477 f16_e = FP16_MASK_E >> FP16_LSB_E; 478 f16_m = 0; 479 } else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) { 480 /* normal float16, be_16 = [1:30]*/ 481 f16_e = be_16; 482 m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E); 483 tmsb = FP32_MSB_M - FP16_MSB_M - 1; 484 if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) { 485 /* round: non-zero truncated bits except MSB */ 486 m_16++; 487 488 /* overflow into exponent */ 489 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 490 f16_e++; 491 } else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) { 492 /* round: MSB of truncated bits and LSB of m_16 is set */ 493 if ((m_16 & 0x1) == 0x1) { 494 m_16++; 495 496 /* overflow into exponent */ 497 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 498 f16_e++; 499 } 500 } 501 f16_m = m_16 & FP16_MASK_M; 502 } else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) { 503 /* underflow: zero / subnormal, be_16 = [-9:0] */ 504 f16_e = 0; 505 506 /* add implicit leading zero */ 507 m_32 = f32_m | BIT(FP32_LSB_E); 508 tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1; 509 m_16 = m_32 >> tbits; 510 511 /* if non-leading truncated bits are set */ 512 if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { 513 m_16++; 514 515 /* overflow into exponent */ 516 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 517 f16_e++; 518 } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { 519 /* if leading truncated bit is set */ 520 if ((m_16 & 0x1) == 0x1) { 521 m_16++; 522 523 /* overflow into exponent */ 524 if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) 525 f16_e++; 526 } 527 } 528 f16_m = m_16 & FP16_MASK_M; 529 } else if (be_16 == -(int)(FP16_MSB_M + 1)) { 530 /* underflow: zero, be_16 = [-10] */ 531 f16_e = 0; 532 if (f32_m != 0) 533 f16_m = 1; 534 else 535 f16_m = 0; 536 } else { 537 /* underflow: zero, be_16 = [-INF:-11] */ 538 f16_e = 0; 539 f16_m = 0; 540 } 541 542 break; 543 } 544 545 u16 = FP16_PACK(f16_s, f16_e, f16_m); 546 547 return u16; 548 } 549 550 int 551 rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output) 552 { 553 float *input_buffer; 554 uint16_t *output_buffer; 555 uint64_t i; 556 557 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 558 return -EINVAL; 559 560 input_buffer = (float *)input; 561 output_buffer = (uint16_t *)output; 562 563 for (i = 0; i < nb_elements; i++) { 564 *output_buffer = __float32_to_float16_scalar_rtn(*input_buffer); 565 566 input_buffer = input_buffer + 1; 567 output_buffer = output_buffer + 1; 568 } 569 570 return 0; 571 } 572 573 /* Convert a half precision floating point number (float16) into a single precision 574 * floating point number (float32). 575 */ 576 static float 577 __float16_to_float32_scalar_rtx(uint16_t f16) 578 { 579 union float32 f32; /* float32 output */ 580 uint16_t f16_s; /* float16 sign */ 581 uint16_t f16_e; /* float16 exponent */ 582 uint16_t f16_m; /* float16 mantissa */ 583 uint32_t f32_s; /* float32 sign */ 584 uint32_t f32_e; /* float32 exponent */ 585 uint32_t f32_m; /* float32 mantissa*/ 586 uint8_t shift; /* number of bits to be shifted */ 587 uint32_t clz; /* count of leading zeroes */ 588 int e_16; /* float16 exponent unbiased */ 589 590 f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S; 591 f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E; 592 f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M; 593 594 f32_s = f16_s; 595 switch (f16_e) { 596 case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */ 597 f32_e = FP32_MASK_E >> FP32_LSB_E; 598 if (f16_m == 0x0) { /* infinity */ 599 f32_m = f16_m; 600 } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ 601 f32_m = f16_m; 602 shift = FP32_MSB_M - FP16_MSB_M; 603 f32_m = (f32_m << shift) & FP32_MASK_M; 604 f32_m |= BIT(FP32_MSB_M); 605 } 606 break; 607 case 0: /* float16: zero or sub-normal */ 608 f32_m = f16_m; 609 if (f16_m == 0) { /* zero signed */ 610 f32_e = 0; 611 } else { /* subnormal numbers */ 612 clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E; 613 e_16 = (int)f16_e - clz; 614 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 615 616 shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1; 617 f32_m = (f32_m << shift) & FP32_MASK_M; 618 } 619 break; 620 default: /* normal numbers */ 621 f32_m = f16_m; 622 e_16 = (int)f16_e; 623 f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; 624 625 shift = (FP32_MSB_M - FP16_MSB_M); 626 f32_m = (f32_m << shift) & FP32_MASK_M; 627 } 628 629 f32.u = FP32_PACK(f32_s, f32_e, f32_m); 630 631 return f32.f; 632 } 633 634 int 635 rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output) 636 { 637 uint16_t *input_buffer; 638 float *output_buffer; 639 uint64_t i; 640 641 if ((nb_elements == 0) || (input == NULL) || (output == NULL)) 642 return -EINVAL; 643 644 input_buffer = (uint16_t *)input; 645 output_buffer = (float *)output; 646 647 for (i = 0; i < nb_elements; i++) { 648 *output_buffer = __float16_to_float32_scalar_rtx(*input_buffer); 649 650 input_buffer = input_buffer + 1; 651 output_buffer = output_buffer + 1; 652 } 653 654 return 0; 655 } 656