xref: /dpdk/lib/mldev/mldev_utils_scalar.c (revision de1f01a8eabd1da08d85e77ff99ba85e03cfd1ad)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include "mldev_utils_scalar.h"
6 
7 /* Description:
8  * This file implements scalar versions of Machine Learning utility functions used to convert data
9  * types from higher precision to lower precision and vice-versa, except bfloat16.
10  */
11 
12 int
13 rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
14 {
15 	float *input_buffer;
16 	int8_t *output_buffer;
17 	uint64_t i;
18 	int i32;
19 
20 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
21 		return -EINVAL;
22 
23 	input_buffer = (float *)input;
24 	output_buffer = (int8_t *)output;
25 
26 	for (i = 0; i < nb_elements; i++) {
27 		i32 = (int32_t)round((*input_buffer) * scale);
28 
29 		if (i32 < INT8_MIN)
30 			i32 = INT8_MIN;
31 
32 		if (i32 > INT8_MAX)
33 			i32 = INT8_MAX;
34 
35 		*output_buffer = (int8_t)i32;
36 
37 		input_buffer++;
38 		output_buffer++;
39 	}
40 
41 	return 0;
42 }
43 
44 int
45 rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
46 {
47 	int8_t *input_buffer;
48 	float *output_buffer;
49 	uint64_t i;
50 
51 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
52 		return -EINVAL;
53 
54 	input_buffer = (int8_t *)input;
55 	output_buffer = (float *)output;
56 
57 	for (i = 0; i < nb_elements; i++) {
58 		*output_buffer = scale * (float)(*input_buffer);
59 
60 		input_buffer++;
61 		output_buffer++;
62 	}
63 
64 	return 0;
65 }
66 
67 int
68 rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
69 {
70 	float *input_buffer;
71 	uint8_t *output_buffer;
72 	int32_t i32;
73 	uint64_t i;
74 
75 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
76 		return -EINVAL;
77 
78 	input_buffer = (float *)input;
79 	output_buffer = (uint8_t *)output;
80 
81 	for (i = 0; i < nb_elements; i++) {
82 		i32 = (int32_t)round((*input_buffer) * scale);
83 
84 		if (i32 < 0)
85 			i32 = 0;
86 
87 		if (i32 > UINT8_MAX)
88 			i32 = UINT8_MAX;
89 
90 		*output_buffer = (uint8_t)i32;
91 
92 		input_buffer++;
93 		output_buffer++;
94 	}
95 
96 	return 0;
97 }
98 
99 int
100 rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
101 {
102 	uint8_t *input_buffer;
103 	float *output_buffer;
104 	uint64_t i;
105 
106 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
107 		return -EINVAL;
108 
109 	input_buffer = (uint8_t *)input;
110 	output_buffer = (float *)output;
111 
112 	for (i = 0; i < nb_elements; i++) {
113 		*output_buffer = scale * (float)(*input_buffer);
114 
115 		input_buffer++;
116 		output_buffer++;
117 	}
118 
119 	return 0;
120 }
121 
122 int
123 rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
124 {
125 	float *input_buffer;
126 	int16_t *output_buffer;
127 	int32_t i32;
128 	uint64_t i;
129 
130 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
131 		return -EINVAL;
132 
133 	input_buffer = (float *)input;
134 	output_buffer = (int16_t *)output;
135 
136 	for (i = 0; i < nb_elements; i++) {
137 		i32 = (int32_t)round((*input_buffer) * scale);
138 
139 		if (i32 < INT16_MIN)
140 			i32 = INT16_MIN;
141 
142 		if (i32 > INT16_MAX)
143 			i32 = INT16_MAX;
144 
145 		*output_buffer = (int16_t)i32;
146 
147 		input_buffer++;
148 		output_buffer++;
149 	}
150 
151 	return 0;
152 }
153 
154 int
155 rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
156 {
157 	int16_t *input_buffer;
158 	float *output_buffer;
159 	uint64_t i;
160 
161 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
162 		return -EINVAL;
163 
164 	input_buffer = (int16_t *)input;
165 	output_buffer = (float *)output;
166 
167 	for (i = 0; i < nb_elements; i++) {
168 		*output_buffer = scale * (float)(*input_buffer);
169 
170 		input_buffer++;
171 		output_buffer++;
172 	}
173 
174 	return 0;
175 }
176 
177 int
178 rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
179 {
180 	float *input_buffer;
181 	uint16_t *output_buffer;
182 	int32_t i32;
183 	uint64_t i;
184 
185 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
186 		return -EINVAL;
187 
188 	input_buffer = (float *)input;
189 	output_buffer = (uint16_t *)output;
190 
191 	for (i = 0; i < nb_elements; i++) {
192 		i32 = (int32_t)round((*input_buffer) * scale);
193 
194 		if (i32 < 0)
195 			i32 = 0;
196 
197 		if (i32 > UINT16_MAX)
198 			i32 = UINT16_MAX;
199 
200 		*output_buffer = (uint16_t)i32;
201 
202 		input_buffer++;
203 		output_buffer++;
204 	}
205 
206 	return 0;
207 }
208 
209 int
210 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
211 {
212 	uint16_t *input_buffer;
213 	float *output_buffer;
214 	uint64_t i;
215 
216 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
217 		return -EINVAL;
218 
219 	input_buffer = (uint16_t *)input;
220 	output_buffer = (float *)output;
221 
222 	for (i = 0; i < nb_elements; i++) {
223 		*output_buffer = scale * (float)(*input_buffer);
224 
225 		input_buffer++;
226 		output_buffer++;
227 	}
228 
229 	return 0;
230 }
231 
232 int
233 rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
234 {
235 	float *input_buffer;
236 	int32_t *output_buffer;
237 	uint64_t i;
238 
239 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
240 		return -EINVAL;
241 
242 	input_buffer = (float *)input;
243 	output_buffer = (int32_t *)output;
244 
245 	for (i = 0; i < nb_elements; i++) {
246 		*output_buffer = (int32_t)round((*input_buffer) * scale);
247 
248 		input_buffer++;
249 		output_buffer++;
250 	}
251 
252 	return 0;
253 }
254 
255 int
256 rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
257 {
258 	int32_t *input_buffer;
259 	float *output_buffer;
260 	uint64_t i;
261 
262 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
263 		return -EINVAL;
264 
265 	input_buffer = (int32_t *)input;
266 	output_buffer = (float *)output;
267 
268 	for (i = 0; i < nb_elements; i++) {
269 		*output_buffer = scale * (float)(*input_buffer);
270 
271 		input_buffer++;
272 		output_buffer++;
273 	}
274 
275 	return 0;
276 }
277 
278 int
279 rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
280 {
281 	float *input_buffer;
282 	uint32_t *output_buffer;
283 	int32_t i32;
284 	uint64_t i;
285 
286 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
287 		return -EINVAL;
288 
289 	input_buffer = (float *)input;
290 	output_buffer = (uint32_t *)output;
291 
292 	for (i = 0; i < nb_elements; i++) {
293 		i32 = (int32_t)round((*input_buffer) * scale);
294 
295 		if (i32 < 0)
296 			i32 = 0;
297 
298 		*output_buffer = (uint32_t)i32;
299 
300 		input_buffer++;
301 		output_buffer++;
302 	}
303 
304 	return 0;
305 }
306 
307 int
308 rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
309 {
310 	uint32_t *input_buffer;
311 	float *output_buffer;
312 	uint64_t i;
313 
314 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
315 		return -EINVAL;
316 
317 	input_buffer = (uint32_t *)input;
318 	output_buffer = (float *)output;
319 
320 	for (i = 0; i < nb_elements; i++) {
321 		*output_buffer = scale * (float)(*input_buffer);
322 
323 		input_buffer++;
324 		output_buffer++;
325 	}
326 
327 	return 0;
328 }
329 
330 int
331 rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output)
332 {
333 	float *input_buffer;
334 	int64_t *output_buffer;
335 	uint64_t i;
336 
337 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
338 		return -EINVAL;
339 
340 	input_buffer = (float *)input;
341 	output_buffer = (int64_t *)output;
342 
343 	for (i = 0; i < nb_elements; i++) {
344 		*output_buffer = (int64_t)round((*input_buffer) * scale);
345 
346 		input_buffer++;
347 		output_buffer++;
348 	}
349 
350 	return 0;
351 }
352 
353 int
354 rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
355 {
356 	int64_t *input_buffer;
357 	float *output_buffer;
358 	uint64_t i;
359 
360 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
361 		return -EINVAL;
362 
363 	input_buffer = (int64_t *)input;
364 	output_buffer = (float *)output;
365 
366 	for (i = 0; i < nb_elements; i++) {
367 		*output_buffer = scale * (float)(*input_buffer);
368 
369 		input_buffer++;
370 		output_buffer++;
371 	}
372 
373 	return 0;
374 }
375 
376 int
377 rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output)
378 {
379 	float *input_buffer;
380 	uint64_t *output_buffer;
381 	int64_t i64;
382 	uint64_t i;
383 
384 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
385 		return -EINVAL;
386 
387 	input_buffer = (float *)input;
388 	output_buffer = (uint64_t *)output;
389 
390 	for (i = 0; i < nb_elements; i++) {
391 		i64 = (int64_t)round((*input_buffer) * scale);
392 
393 		if (i64 < 0)
394 			i64 = 0;
395 
396 		*output_buffer = (uint64_t)i64;
397 
398 		input_buffer++;
399 		output_buffer++;
400 	}
401 
402 	return 0;
403 }
404 
405 int
406 rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
407 {
408 	uint64_t *input_buffer;
409 	float *output_buffer;
410 	uint64_t i;
411 
412 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
413 		return -EINVAL;
414 
415 	input_buffer = (uint64_t *)input;
416 	output_buffer = (float *)output;
417 
418 	for (i = 0; i < nb_elements; i++) {
419 		*output_buffer = scale * (float)(*input_buffer);
420 
421 		input_buffer++;
422 		output_buffer++;
423 	}
424 
425 	return 0;
426 }
427 
428 /* Convert a single precision floating point number (float32) into a half precision
429  * floating point number (float16) using round to nearest rounding mode.
430  */
431 static uint16_t
432 __float32_to_float16_scalar_rtn(float x)
433 {
434 	union float32 f32; /* float32 input */
435 	uint32_t f32_s;	   /* float32 sign */
436 	uint32_t f32_e;	   /* float32 exponent */
437 	uint32_t f32_m;	   /* float32 mantissa */
438 	uint16_t f16_s;	   /* float16 sign */
439 	uint16_t f16_e;	   /* float16 exponent */
440 	uint16_t f16_m;	   /* float16 mantissa */
441 	uint32_t tbits;	   /* number of truncated bits */
442 	uint32_t tmsb;	   /* MSB position of truncated bits */
443 	uint32_t m_32;	   /* temporary float32 mantissa */
444 	uint16_t m_16;	   /* temporary float16 mantissa */
445 	uint16_t u16;	   /* float16 output */
446 	int be_16;	   /* float16 biased exponent, signed */
447 
448 	f32.f = x;
449 	f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
450 	f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
451 	f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
452 
453 	f16_s = f32_s;
454 	f16_e = 0;
455 	f16_m = 0;
456 
457 	switch (f32_e) {
458 	case (0): /* float32: zero or subnormal number */
459 		f16_e = 0;
460 		f16_m = 0; /* convert to zero */
461 		break;
462 	case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
463 		f16_e = FP16_MASK_E >> FP16_LSB_E;
464 		if (f32_m == 0) { /* infinity */
465 			f16_m = 0;
466 		} else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
467 			f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M);
468 			f16_m |= BIT(FP16_MSB_M);
469 		}
470 		break;
471 	default: /* float32: normal number */
472 		/* compute biased exponent for float16 */
473 		be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E;
474 
475 		/* overflow, be_16 = [31-INF], set to infinity */
476 		if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) {
477 			f16_e = FP16_MASK_E >> FP16_LSB_E;
478 			f16_m = 0;
479 		} else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) {
480 			/* normal float16, be_16 = [1:30]*/
481 			f16_e = be_16;
482 			m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E);
483 			tmsb = FP32_MSB_M - FP16_MSB_M - 1;
484 			if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) {
485 				/* round: non-zero truncated bits except MSB */
486 				m_16++;
487 
488 				/* overflow into exponent */
489 				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
490 					f16_e++;
491 			} else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) {
492 				/* round: MSB of truncated bits and LSB of m_16 is set */
493 				if ((m_16 & 0x1) == 0x1) {
494 					m_16++;
495 
496 					/* overflow into exponent */
497 					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
498 						f16_e++;
499 				}
500 			}
501 			f16_m = m_16 & FP16_MASK_M;
502 		} else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) {
503 			/* underflow: zero / subnormal, be_16 = [-9:0] */
504 			f16_e = 0;
505 
506 			/* add implicit leading zero */
507 			m_32 = f32_m | BIT(FP32_LSB_E);
508 			tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1;
509 			m_16 = m_32 >> tbits;
510 
511 			/* if non-leading truncated bits are set */
512 			if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
513 				m_16++;
514 
515 				/* overflow into exponent */
516 				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
517 					f16_e++;
518 			} else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
519 				/* if leading truncated bit is set */
520 				if ((m_16 & 0x1) == 0x1) {
521 					m_16++;
522 
523 					/* overflow into exponent */
524 					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
525 						f16_e++;
526 				}
527 			}
528 			f16_m = m_16 & FP16_MASK_M;
529 		} else if (be_16 == -(int)(FP16_MSB_M + 1)) {
530 			/* underflow: zero, be_16 = [-10] */
531 			f16_e = 0;
532 			if (f32_m != 0)
533 				f16_m = 1;
534 			else
535 				f16_m = 0;
536 		} else {
537 			/* underflow: zero, be_16 = [-INF:-11] */
538 			f16_e = 0;
539 			f16_m = 0;
540 		}
541 
542 		break;
543 	}
544 
545 	u16 = FP16_PACK(f16_s, f16_e, f16_m);
546 
547 	return u16;
548 }
549 
550 int
551 rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
552 {
553 	float *input_buffer;
554 	uint16_t *output_buffer;
555 	uint64_t i;
556 
557 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
558 		return -EINVAL;
559 
560 	input_buffer = (float *)input;
561 	output_buffer = (uint16_t *)output;
562 
563 	for (i = 0; i < nb_elements; i++) {
564 		*output_buffer = __float32_to_float16_scalar_rtn(*input_buffer);
565 
566 		input_buffer = input_buffer + 1;
567 		output_buffer = output_buffer + 1;
568 	}
569 
570 	return 0;
571 }
572 
573 /* Convert a half precision floating point number (float16) into a single precision
574  * floating point number (float32).
575  */
576 static float
577 __float16_to_float32_scalar_rtx(uint16_t f16)
578 {
579 	union float32 f32; /* float32 output */
580 	uint16_t f16_s;	   /* float16 sign */
581 	uint16_t f16_e;	   /* float16 exponent */
582 	uint16_t f16_m;	   /* float16 mantissa */
583 	uint32_t f32_s;	   /* float32 sign */
584 	uint32_t f32_e;	   /* float32 exponent */
585 	uint32_t f32_m;	   /* float32 mantissa*/
586 	uint8_t shift;	   /* number of bits to be shifted */
587 	uint32_t clz;	   /* count of leading zeroes */
588 	int e_16;	   /* float16 exponent unbiased */
589 
590 	f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S;
591 	f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E;
592 	f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M;
593 
594 	f32_s = f16_s;
595 	switch (f16_e) {
596 	case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */
597 		f32_e = FP32_MASK_E >> FP32_LSB_E;
598 		if (f16_m == 0x0) { /* infinity */
599 			f32_m = f16_m;
600 		} else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
601 			f32_m = f16_m;
602 			shift = FP32_MSB_M - FP16_MSB_M;
603 			f32_m = (f32_m << shift) & FP32_MASK_M;
604 			f32_m |= BIT(FP32_MSB_M);
605 		}
606 		break;
607 	case 0: /* float16: zero or sub-normal */
608 		f32_m = f16_m;
609 		if (f16_m == 0) { /* zero signed */
610 			f32_e = 0;
611 		} else { /* subnormal numbers */
612 			clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E;
613 			e_16 = (int)f16_e - clz;
614 			f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
615 
616 			shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1;
617 			f32_m = (f32_m << shift) & FP32_MASK_M;
618 		}
619 		break;
620 	default: /* normal numbers */
621 		f32_m = f16_m;
622 		e_16 = (int)f16_e;
623 		f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
624 
625 		shift = (FP32_MSB_M - FP16_MSB_M);
626 		f32_m = (f32_m << shift) & FP32_MASK_M;
627 	}
628 
629 	f32.u = FP32_PACK(f32_s, f32_e, f32_m);
630 
631 	return f32.f;
632 }
633 
634 int
635 rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
636 {
637 	uint16_t *input_buffer;
638 	float *output_buffer;
639 	uint64_t i;
640 
641 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
642 		return -EINVAL;
643 
644 	input_buffer = (uint16_t *)input;
645 	output_buffer = (float *)output;
646 
647 	for (i = 0; i < nb_elements; i++) {
648 		*output_buffer = __float16_to_float32_scalar_rtx(*input_buffer);
649 
650 		input_buffer = input_buffer + 1;
651 		output_buffer = output_buffer + 1;
652 	}
653 
654 	return 0;
655 }
656