xref: /dpdk/lib/mldev/mldev_utils_scalar.c (revision 50513ae53ea9c4cc35b4d7d5df6361da77f77cbb)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2022 Marvell.
3  */
4 
5 #include "mldev_utils_scalar.h"
6 
7 /* Description:
8  * This file implements scalar versions of Machine Learning utility functions used to convert data
9  * types from higher precision to lower precision and vice-versa, except bfloat16.
10  */
11 
12 int
13 rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
14 {
15 	float *input_buffer;
16 	int8_t *output_buffer;
17 	uint64_t i;
18 	int i32;
19 
20 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
21 		return -EINVAL;
22 
23 	input_buffer = (float *)input;
24 	output_buffer = (int8_t *)output;
25 
26 	for (i = 0; i < nb_elements; i++) {
27 		i32 = (int32_t)round((*input_buffer) * scale);
28 
29 		if (i32 < INT8_MIN)
30 			i32 = INT8_MIN;
31 
32 		if (i32 > INT8_MAX)
33 			i32 = INT8_MAX;
34 
35 		*output_buffer = (int8_t)i32;
36 
37 		input_buffer++;
38 		output_buffer++;
39 	}
40 
41 	return 0;
42 }
43 
44 int
45 rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
46 {
47 	int8_t *input_buffer;
48 	float *output_buffer;
49 	uint64_t i;
50 
51 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
52 		return -EINVAL;
53 
54 	input_buffer = (int8_t *)input;
55 	output_buffer = (float *)output;
56 
57 	for (i = 0; i < nb_elements; i++) {
58 		*output_buffer = scale * (float)(*input_buffer);
59 
60 		input_buffer++;
61 		output_buffer++;
62 	}
63 
64 	return 0;
65 }
66 
67 int
68 rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
69 {
70 	float *input_buffer;
71 	uint8_t *output_buffer;
72 	int32_t i32;
73 	uint64_t i;
74 
75 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
76 		return -EINVAL;
77 
78 	input_buffer = (float *)input;
79 	output_buffer = (uint8_t *)output;
80 
81 	for (i = 0; i < nb_elements; i++) {
82 		i32 = (int32_t)round((*input_buffer) * scale);
83 
84 		if (i32 < 0)
85 			i32 = 0;
86 
87 		if (i32 > UINT8_MAX)
88 			i32 = UINT8_MAX;
89 
90 		*output_buffer = (uint8_t)i32;
91 
92 		input_buffer++;
93 		output_buffer++;
94 	}
95 
96 	return 0;
97 }
98 
99 int
100 rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
101 {
102 	uint8_t *input_buffer;
103 	float *output_buffer;
104 	uint64_t i;
105 
106 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
107 		return -EINVAL;
108 
109 	input_buffer = (uint8_t *)input;
110 	output_buffer = (float *)output;
111 
112 	for (i = 0; i < nb_elements; i++) {
113 		*output_buffer = scale * (float)(*input_buffer);
114 
115 		input_buffer++;
116 		output_buffer++;
117 	}
118 
119 	return 0;
120 }
121 
122 int
123 rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
124 {
125 	float *input_buffer;
126 	int16_t *output_buffer;
127 	int32_t i32;
128 	uint64_t i;
129 
130 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
131 		return -EINVAL;
132 
133 	input_buffer = (float *)input;
134 	output_buffer = (int16_t *)output;
135 
136 	for (i = 0; i < nb_elements; i++) {
137 		i32 = (int32_t)round((*input_buffer) * scale);
138 
139 		if (i32 < INT16_MIN)
140 			i32 = INT16_MIN;
141 
142 		if (i32 > INT16_MAX)
143 			i32 = INT16_MAX;
144 
145 		*output_buffer = (int16_t)i32;
146 
147 		input_buffer++;
148 		output_buffer++;
149 	}
150 
151 	return 0;
152 }
153 
154 int
155 rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
156 {
157 	int16_t *input_buffer;
158 	float *output_buffer;
159 	uint64_t i;
160 
161 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
162 		return -EINVAL;
163 
164 	input_buffer = (int16_t *)input;
165 	output_buffer = (float *)output;
166 
167 	for (i = 0; i < nb_elements; i++) {
168 		*output_buffer = scale * (float)(*input_buffer);
169 
170 		input_buffer++;
171 		output_buffer++;
172 	}
173 
174 	return 0;
175 }
176 
177 int
178 rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
179 {
180 	float *input_buffer;
181 	uint16_t *output_buffer;
182 	int32_t i32;
183 	uint64_t i;
184 
185 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
186 		return -EINVAL;
187 
188 	input_buffer = (float *)input;
189 	output_buffer = (uint16_t *)output;
190 
191 	for (i = 0; i < nb_elements; i++) {
192 		i32 = (int32_t)round((*input_buffer) * scale);
193 
194 		if (i32 < 0)
195 			i32 = 0;
196 
197 		if (i32 > UINT16_MAX)
198 			i32 = UINT16_MAX;
199 
200 		*output_buffer = (uint16_t)i32;
201 
202 		input_buffer++;
203 		output_buffer++;
204 	}
205 
206 	return 0;
207 }
208 
209 int
210 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
211 {
212 	uint16_t *input_buffer;
213 	float *output_buffer;
214 	uint64_t i;
215 
216 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
217 		return -EINVAL;
218 
219 	input_buffer = (uint16_t *)input;
220 	output_buffer = (float *)output;
221 
222 	for (i = 0; i < nb_elements; i++) {
223 		*output_buffer = scale * (float)(*input_buffer);
224 
225 		input_buffer++;
226 		output_buffer++;
227 	}
228 
229 	return 0;
230 }
231 
232 int
233 rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
234 {
235 	float *input_buffer;
236 	int32_t *output_buffer;
237 	uint64_t i;
238 
239 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
240 		return -EINVAL;
241 
242 	input_buffer = (float *)input;
243 	output_buffer = (int32_t *)output;
244 
245 	for (i = 0; i < nb_elements; i++) {
246 		*output_buffer = (int32_t)round((*input_buffer) * scale);
247 
248 		input_buffer++;
249 		output_buffer++;
250 	}
251 
252 	return 0;
253 }
254 
255 int
256 rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
257 {
258 	int32_t *input_buffer;
259 	float *output_buffer;
260 	uint64_t i;
261 
262 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
263 		return -EINVAL;
264 
265 	input_buffer = (int32_t *)input;
266 	output_buffer = (float *)output;
267 
268 	for (i = 0; i < nb_elements; i++) {
269 		*output_buffer = scale * (float)(*input_buffer);
270 
271 		input_buffer++;
272 		output_buffer++;
273 	}
274 
275 	return 0;
276 }
277 
278 int
279 rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
280 {
281 	float *input_buffer;
282 	uint32_t *output_buffer;
283 	int32_t i32;
284 	uint64_t i;
285 
286 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
287 		return -EINVAL;
288 
289 	input_buffer = (float *)input;
290 	output_buffer = (uint32_t *)output;
291 
292 	for (i = 0; i < nb_elements; i++) {
293 		i32 = (int32_t)round((*input_buffer) * scale);
294 
295 		if (i32 < 0)
296 			i32 = 0;
297 
298 		*output_buffer = (uint32_t)i32;
299 
300 		input_buffer++;
301 		output_buffer++;
302 	}
303 
304 	return 0;
305 }
306 
307 int
308 rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
309 {
310 	uint32_t *input_buffer;
311 	float *output_buffer;
312 	uint64_t i;
313 
314 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
315 		return -EINVAL;
316 
317 	input_buffer = (uint32_t *)input;
318 	output_buffer = (float *)output;
319 
320 	for (i = 0; i < nb_elements; i++) {
321 		*output_buffer = scale * (float)(*input_buffer);
322 
323 		input_buffer++;
324 		output_buffer++;
325 	}
326 
327 	return 0;
328 }
329 
330 /* Convert a single precision floating point number (float32) into a half precision
331  * floating point number (float16) using round to nearest rounding mode.
332  */
333 static uint16_t
334 __float32_to_float16_scalar_rtn(float x)
335 {
336 	union float32 f32; /* float32 input */
337 	uint32_t f32_s;	   /* float32 sign */
338 	uint32_t f32_e;	   /* float32 exponent */
339 	uint32_t f32_m;	   /* float32 mantissa */
340 	uint16_t f16_s;	   /* float16 sign */
341 	uint16_t f16_e;	   /* float16 exponent */
342 	uint16_t f16_m;	   /* float16 mantissa */
343 	uint32_t tbits;	   /* number of truncated bits */
344 	uint32_t tmsb;	   /* MSB position of truncated bits */
345 	uint32_t m_32;	   /* temporary float32 mantissa */
346 	uint16_t m_16;	   /* temporary float16 mantissa */
347 	uint16_t u16;	   /* float16 output */
348 	int be_16;	   /* float16 biased exponent, signed */
349 
350 	f32.f = x;
351 	f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
352 	f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
353 	f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
354 
355 	f16_s = f32_s;
356 	f16_e = 0;
357 	f16_m = 0;
358 
359 	switch (f32_e) {
360 	case (0): /* float32: zero or subnormal number */
361 		f16_e = 0;
362 		f16_m = 0; /* convert to zero */
363 		break;
364 	case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
365 		f16_e = FP16_MASK_E >> FP16_LSB_E;
366 		if (f32_m == 0) { /* infinity */
367 			f16_m = 0;
368 		} else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
369 			f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M);
370 			f16_m |= BIT(FP16_MSB_M);
371 		}
372 		break;
373 	default: /* float32: normal number */
374 		/* compute biased exponent for float16 */
375 		be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E;
376 
377 		/* overflow, be_16 = [31-INF], set to infinity */
378 		if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) {
379 			f16_e = FP16_MASK_E >> FP16_LSB_E;
380 			f16_m = 0;
381 		} else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) {
382 			/* normal float16, be_16 = [1:30]*/
383 			f16_e = be_16;
384 			m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E);
385 			tmsb = FP32_MSB_M - FP16_MSB_M - 1;
386 			if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) {
387 				/* round: non-zero truncated bits except MSB */
388 				m_16++;
389 
390 				/* overflow into exponent */
391 				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
392 					f16_e++;
393 			} else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) {
394 				/* round: MSB of truncated bits and LSB of m_16 is set */
395 				if ((m_16 & 0x1) == 0x1) {
396 					m_16++;
397 
398 					/* overflow into exponent */
399 					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
400 						f16_e++;
401 				}
402 			}
403 			f16_m = m_16 & FP16_MASK_M;
404 		} else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) {
405 			/* underflow: zero / subnormal, be_16 = [-9:0] */
406 			f16_e = 0;
407 
408 			/* add implicit leading zero */
409 			m_32 = f32_m | BIT(FP32_LSB_E);
410 			tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1;
411 			m_16 = m_32 >> tbits;
412 
413 			/* if non-leading truncated bits are set */
414 			if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
415 				m_16++;
416 
417 				/* overflow into exponent */
418 				if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
419 					f16_e++;
420 			} else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
421 				/* if leading truncated bit is set */
422 				if ((m_16 & 0x1) == 0x1) {
423 					m_16++;
424 
425 					/* overflow into exponent */
426 					if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1)
427 						f16_e++;
428 				}
429 			}
430 			f16_m = m_16 & FP16_MASK_M;
431 		} else if (be_16 == -(int)(FP16_MSB_M + 1)) {
432 			/* underflow: zero, be_16 = [-10] */
433 			f16_e = 0;
434 			if (f32_m != 0)
435 				f16_m = 1;
436 			else
437 				f16_m = 0;
438 		} else {
439 			/* underflow: zero, be_16 = [-INF:-11] */
440 			f16_e = 0;
441 			f16_m = 0;
442 		}
443 
444 		break;
445 	}
446 
447 	u16 = FP16_PACK(f16_s, f16_e, f16_m);
448 
449 	return u16;
450 }
451 
452 int
453 rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
454 {
455 	float *input_buffer;
456 	uint16_t *output_buffer;
457 	uint64_t i;
458 
459 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
460 		return -EINVAL;
461 
462 	input_buffer = (float *)input;
463 	output_buffer = (uint16_t *)output;
464 
465 	for (i = 0; i < nb_elements; i++) {
466 		*output_buffer = __float32_to_float16_scalar_rtn(*input_buffer);
467 
468 		input_buffer = input_buffer + 1;
469 		output_buffer = output_buffer + 1;
470 	}
471 
472 	return 0;
473 }
474 
475 /* Convert a half precision floating point number (float16) into a single precision
476  * floating point number (float32).
477  */
478 static float
479 __float16_to_float32_scalar_rtx(uint16_t f16)
480 {
481 	union float32 f32; /* float32 output */
482 	uint16_t f16_s;	   /* float16 sign */
483 	uint16_t f16_e;	   /* float16 exponent */
484 	uint16_t f16_m;	   /* float16 mantissa */
485 	uint32_t f32_s;	   /* float32 sign */
486 	uint32_t f32_e;	   /* float32 exponent */
487 	uint32_t f32_m;	   /* float32 mantissa*/
488 	uint8_t shift;	   /* number of bits to be shifted */
489 	uint32_t clz;	   /* count of leading zeroes */
490 	int e_16;	   /* float16 exponent unbiased */
491 
492 	f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S;
493 	f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E;
494 	f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M;
495 
496 	f32_s = f16_s;
497 	switch (f16_e) {
498 	case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */
499 		f32_e = FP32_MASK_E >> FP32_LSB_E;
500 		if (f16_m == 0x0) { /* infinity */
501 			f32_m = f16_m;
502 		} else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
503 			f32_m = f16_m;
504 			shift = FP32_MSB_M - FP16_MSB_M;
505 			f32_m = (f32_m << shift) & FP32_MASK_M;
506 			f32_m |= BIT(FP32_MSB_M);
507 		}
508 		break;
509 	case 0: /* float16: zero or sub-normal */
510 		f32_m = f16_m;
511 		if (f16_m == 0) { /* zero signed */
512 			f32_e = 0;
513 		} else { /* subnormal numbers */
514 			clz = rte_clz32((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E;
515 			e_16 = (int)f16_e - clz;
516 			f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
517 
518 			shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1;
519 			f32_m = (f32_m << shift) & FP32_MASK_M;
520 		}
521 		break;
522 	default: /* normal numbers */
523 		f32_m = f16_m;
524 		e_16 = (int)f16_e;
525 		f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E;
526 
527 		shift = (FP32_MSB_M - FP16_MSB_M);
528 		f32_m = (f32_m << shift) & FP32_MASK_M;
529 	}
530 
531 	f32.u = FP32_PACK(f32_s, f32_e, f32_m);
532 
533 	return f32.f;
534 }
535 
536 int
537 rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
538 {
539 	uint16_t *input_buffer;
540 	float *output_buffer;
541 	uint64_t i;
542 
543 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
544 		return -EINVAL;
545 
546 	input_buffer = (uint16_t *)input;
547 	output_buffer = (float *)output;
548 
549 	for (i = 0; i < nb_elements; i++) {
550 		*output_buffer = __float16_to_float32_scalar_rtx(*input_buffer);
551 
552 		input_buffer = input_buffer + 1;
553 		output_buffer = output_buffer + 1;
554 	}
555 
556 	return 0;
557 }
558