Lines Matching full:v

315 INLINE void round_fn4(__m128i v[16], __m128i m[16], size_t r) {  in round_fn4()
316 v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); in round_fn4()
317 v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); in round_fn4()
318 v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); in round_fn4()
319 v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); in round_fn4()
320 v[0] = add_128(v[0], v[4]); in round_fn4()
321 v[1] = add_128(v[1], v[5]); in round_fn4()
322 v[2] = add_128(v[2], v[6]); in round_fn4()
323 v[3] = add_128(v[3], v[7]); in round_fn4()
324 v[12] = xor_128(v[12], v[0]); in round_fn4()
325 v[13] = xor_128(v[13], v[1]); in round_fn4()
326 v[14] = xor_128(v[14], v[2]); in round_fn4()
327 v[15] = xor_128(v[15], v[3]); in round_fn4()
328 v[12] = rot16_128(v[12]); in round_fn4()
329 v[13] = rot16_128(v[13]); in round_fn4()
330 v[14] = rot16_128(v[14]); in round_fn4()
331 v[15] = rot16_128(v[15]); in round_fn4()
332 v[8] = add_128(v[8], v[12]); in round_fn4()
333 v[9] = add_128(v[9], v[13]); in round_fn4()
334 v[10] = add_128(v[10], v[14]); in round_fn4()
335 v[11] = add_128(v[11], v[15]); in round_fn4()
336 v[4] = xor_128(v[4], v[8]); in round_fn4()
337 v[5] = xor_128(v[5], v[9]); in round_fn4()
338 v[6] = xor_128(v[6], v[10]); in round_fn4()
339 v[7] = xor_128(v[7], v[11]); in round_fn4()
340 v[4] = rot12_128(v[4]); in round_fn4()
341 v[5] = rot12_128(v[5]); in round_fn4()
342 v[6] = rot12_128(v[6]); in round_fn4()
343 v[7] = rot12_128(v[7]); in round_fn4()
344 v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); in round_fn4()
345 v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); in round_fn4()
346 v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); in round_fn4()
347 v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); in round_fn4()
348 v[0] = add_128(v[0], v[4]); in round_fn4()
349 v[1] = add_128(v[1], v[5]); in round_fn4()
350 v[2] = add_128(v[2], v[6]); in round_fn4()
351 v[3] = add_128(v[3], v[7]); in round_fn4()
352 v[12] = xor_128(v[12], v[0]); in round_fn4()
353 v[13] = xor_128(v[13], v[1]); in round_fn4()
354 v[14] = xor_128(v[14], v[2]); in round_fn4()
355 v[15] = xor_128(v[15], v[3]); in round_fn4()
356 v[12] = rot8_128(v[12]); in round_fn4()
357 v[13] = rot8_128(v[13]); in round_fn4()
358 v[14] = rot8_128(v[14]); in round_fn4()
359 v[15] = rot8_128(v[15]); in round_fn4()
360 v[8] = add_128(v[8], v[12]); in round_fn4()
361 v[9] = add_128(v[9], v[13]); in round_fn4()
362 v[10] = add_128(v[10], v[14]); in round_fn4()
363 v[11] = add_128(v[11], v[15]); in round_fn4()
364 v[4] = xor_128(v[4], v[8]); in round_fn4()
365 v[5] = xor_128(v[5], v[9]); in round_fn4()
366 v[6] = xor_128(v[6], v[10]); in round_fn4()
367 v[7] = xor_128(v[7], v[11]); in round_fn4()
368 v[4] = rot7_128(v[4]); in round_fn4()
369 v[5] = rot7_128(v[5]); in round_fn4()
370 v[6] = rot7_128(v[6]); in round_fn4()
371 v[7] = rot7_128(v[7]); in round_fn4()
373 v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); in round_fn4()
374 v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); in round_fn4()
375 v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); in round_fn4()
376 v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); in round_fn4()
377 v[0] = add_128(v[0], v[5]); in round_fn4()
378 v[1] = add_128(v[1], v[6]); in round_fn4()
379 v[2] = add_128(v[2], v[7]); in round_fn4()
380 v[3] = add_128(v[3], v[4]); in round_fn4()
381 v[15] = xor_128(v[15], v[0]); in round_fn4()
382 v[12] = xor_128(v[12], v[1]); in round_fn4()
383 v[13] = xor_128(v[13], v[2]); in round_fn4()
384 v[14] = xor_128(v[14], v[3]); in round_fn4()
385 v[15] = rot16_128(v[15]); in round_fn4()
386 v[12] = rot16_128(v[12]); in round_fn4()
387 v[13] = rot16_128(v[13]); in round_fn4()
388 v[14] = rot16_128(v[14]); in round_fn4()
389 v[10] = add_128(v[10], v[15]); in round_fn4()
390 v[11] = add_128(v[11], v[12]); in round_fn4()
391 v[8] = add_128(v[8], v[13]); in round_fn4()
392 v[9] = add_128(v[9], v[14]); in round_fn4()
393 v[5] = xor_128(v[5], v[10]); in round_fn4()
394 v[6] = xor_128(v[6], v[11]); in round_fn4()
395 v[7] = xor_128(v[7], v[8]); in round_fn4()
396 v[4] = xor_128(v[4], v[9]); in round_fn4()
397 v[5] = rot12_128(v[5]); in round_fn4()
398 v[6] = rot12_128(v[6]); in round_fn4()
399 v[7] = rot12_128(v[7]); in round_fn4()
400 v[4] = rot12_128(v[4]); in round_fn4()
401 v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); in round_fn4()
402 v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); in round_fn4()
403 v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); in round_fn4()
404 v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); in round_fn4()
405 v[0] = add_128(v[0], v[5]); in round_fn4()
406 v[1] = add_128(v[1], v[6]); in round_fn4()
407 v[2] = add_128(v[2], v[7]); in round_fn4()
408 v[3] = add_128(v[3], v[4]); in round_fn4()
409 v[15] = xor_128(v[15], v[0]); in round_fn4()
410 v[12] = xor_128(v[12], v[1]); in round_fn4()
411 v[13] = xor_128(v[13], v[2]); in round_fn4()
412 v[14] = xor_128(v[14], v[3]); in round_fn4()
413 v[15] = rot8_128(v[15]); in round_fn4()
414 v[12] = rot8_128(v[12]); in round_fn4()
415 v[13] = rot8_128(v[13]); in round_fn4()
416 v[14] = rot8_128(v[14]); in round_fn4()
417 v[10] = add_128(v[10], v[15]); in round_fn4()
418 v[11] = add_128(v[11], v[12]); in round_fn4()
419 v[8] = add_128(v[8], v[13]); in round_fn4()
420 v[9] = add_128(v[9], v[14]); in round_fn4()
421 v[5] = xor_128(v[5], v[10]); in round_fn4()
422 v[6] = xor_128(v[6], v[11]); in round_fn4()
423 v[7] = xor_128(v[7], v[8]); in round_fn4()
424 v[4] = xor_128(v[4], v[9]); in round_fn4()
425 v[5] = rot7_128(v[5]); in round_fn4()
426 v[6] = rot7_128(v[6]); in round_fn4()
427 v[7] = rot7_128(v[7]); in round_fn4()
428 v[4] = rot7_128(v[4]); in round_fn4()
514 __m128i v[16] = { in blake3_hash4_avx512() local
520 round_fn4(v, msg_vecs, 0); in blake3_hash4_avx512()
521 round_fn4(v, msg_vecs, 1); in blake3_hash4_avx512()
522 round_fn4(v, msg_vecs, 2); in blake3_hash4_avx512()
523 round_fn4(v, msg_vecs, 3); in blake3_hash4_avx512()
524 round_fn4(v, msg_vecs, 4); in blake3_hash4_avx512()
525 round_fn4(v, msg_vecs, 5); in blake3_hash4_avx512()
526 round_fn4(v, msg_vecs, 6); in blake3_hash4_avx512()
527 h_vecs[0] = xor_128(v[0], v[8]); in blake3_hash4_avx512()
528 h_vecs[1] = xor_128(v[1], v[9]); in blake3_hash4_avx512()
529 h_vecs[2] = xor_128(v[2], v[10]); in blake3_hash4_avx512()
530 h_vecs[3] = xor_128(v[3], v[11]); in blake3_hash4_avx512()
531 h_vecs[4] = xor_128(v[4], v[12]); in blake3_hash4_avx512()
532 h_vecs[5] = xor_128(v[5], v[13]); in blake3_hash4_avx512()
533 h_vecs[6] = xor_128(v[6], v[14]); in blake3_hash4_avx512()
534 h_vecs[7] = xor_128(v[7], v[15]); in blake3_hash4_avx512()
559 INLINE void round_fn8(__m256i v[16], __m256i m[16], size_t r) { in round_fn8()
560 v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); in round_fn8()
561 v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); in round_fn8()
562 v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); in round_fn8()
563 v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); in round_fn8()
564 v[0] = add_256(v[0], v[4]); in round_fn8()
565 v[1] = add_256(v[1], v[5]); in round_fn8()
566 v[2] = add_256(v[2], v[6]); in round_fn8()
567 v[3] = add_256(v[3], v[7]); in round_fn8()
568 v[12] = xor_256(v[12], v[0]); in round_fn8()
569 v[13] = xor_256(v[13], v[1]); in round_fn8()
570 v[14] = xor_256(v[14], v[2]); in round_fn8()
571 v[15] = xor_256(v[15], v[3]); in round_fn8()
572 v[12] = rot16_256(v[12]); in round_fn8()
573 v[13] = rot16_256(v[13]); in round_fn8()
574 v[14] = rot16_256(v[14]); in round_fn8()
575 v[15] = rot16_256(v[15]); in round_fn8()
576 v[8] = add_256(v[8], v[12]); in round_fn8()
577 v[9] = add_256(v[9], v[13]); in round_fn8()
578 v[10] = add_256(v[10], v[14]); in round_fn8()
579 v[11] = add_256(v[11], v[15]); in round_fn8()
580 v[4] = xor_256(v[4], v[8]); in round_fn8()
581 v[5] = xor_256(v[5], v[9]); in round_fn8()
582 v[6] = xor_256(v[6], v[10]); in round_fn8()
583 v[7] = xor_256(v[7], v[11]); in round_fn8()
584 v[4] = rot12_256(v[4]); in round_fn8()
585 v[5] = rot12_256(v[5]); in round_fn8()
586 v[6] = rot12_256(v[6]); in round_fn8()
587 v[7] = rot12_256(v[7]); in round_fn8()
588 v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); in round_fn8()
589 v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); in round_fn8()
590 v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); in round_fn8()
591 v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); in round_fn8()
592 v[0] = add_256(v[0], v[4]); in round_fn8()
593 v[1] = add_256(v[1], v[5]); in round_fn8()
594 v[2] = add_256(v[2], v[6]); in round_fn8()
595 v[3] = add_256(v[3], v[7]); in round_fn8()
596 v[12] = xor_256(v[12], v[0]); in round_fn8()
597 v[13] = xor_256(v[13], v[1]); in round_fn8()
598 v[14] = xor_256(v[14], v[2]); in round_fn8()
599 v[15] = xor_256(v[15], v[3]); in round_fn8()
600 v[12] = rot8_256(v[12]); in round_fn8()
601 v[13] = rot8_256(v[13]); in round_fn8()
602 v[14] = rot8_256(v[14]); in round_fn8()
603 v[15] = rot8_256(v[15]); in round_fn8()
604 v[8] = add_256(v[8], v[12]); in round_fn8()
605 v[9] = add_256(v[9], v[13]); in round_fn8()
606 v[10] = add_256(v[10], v[14]); in round_fn8()
607 v[11] = add_256(v[11], v[15]); in round_fn8()
608 v[4] = xor_256(v[4], v[8]); in round_fn8()
609 v[5] = xor_256(v[5], v[9]); in round_fn8()
610 v[6] = xor_256(v[6], v[10]); in round_fn8()
611 v[7] = xor_256(v[7], v[11]); in round_fn8()
612 v[4] = rot7_256(v[4]); in round_fn8()
613 v[5] = rot7_256(v[5]); in round_fn8()
614 v[6] = rot7_256(v[6]); in round_fn8()
615 v[7] = rot7_256(v[7]); in round_fn8()
617 v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); in round_fn8()
618 v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); in round_fn8()
619 v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); in round_fn8()
620 v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); in round_fn8()
621 v[0] = add_256(v[0], v[5]); in round_fn8()
622 v[1] = add_256(v[1], v[6]); in round_fn8()
623 v[2] = add_256(v[2], v[7]); in round_fn8()
624 v[3] = add_256(v[3], v[4]); in round_fn8()
625 v[15] = xor_256(v[15], v[0]); in round_fn8()
626 v[12] = xor_256(v[12], v[1]); in round_fn8()
627 v[13] = xor_256(v[13], v[2]); in round_fn8()
628 v[14] = xor_256(v[14], v[3]); in round_fn8()
629 v[15] = rot16_256(v[15]); in round_fn8()
630 v[12] = rot16_256(v[12]); in round_fn8()
631 v[13] = rot16_256(v[13]); in round_fn8()
632 v[14] = rot16_256(v[14]); in round_fn8()
633 v[10] = add_256(v[10], v[15]); in round_fn8()
634 v[11] = add_256(v[11], v[12]); in round_fn8()
635 v[8] = add_256(v[8], v[13]); in round_fn8()
636 v[9] = add_256(v[9], v[14]); in round_fn8()
637 v[5] = xor_256(v[5], v[10]); in round_fn8()
638 v[6] = xor_256(v[6], v[11]); in round_fn8()
639 v[7] = xor_256(v[7], v[8]); in round_fn8()
640 v[4] = xor_256(v[4], v[9]); in round_fn8()
641 v[5] = rot12_256(v[5]); in round_fn8()
642 v[6] = rot12_256(v[6]); in round_fn8()
643 v[7] = rot12_256(v[7]); in round_fn8()
644 v[4] = rot12_256(v[4]); in round_fn8()
645 v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); in round_fn8()
646 v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); in round_fn8()
647 v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); in round_fn8()
648 v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); in round_fn8()
649 v[0] = add_256(v[0], v[5]); in round_fn8()
650 v[1] = add_256(v[1], v[6]); in round_fn8()
651 v[2] = add_256(v[2], v[7]); in round_fn8()
652 v[3] = add_256(v[3], v[4]); in round_fn8()
653 v[15] = xor_256(v[15], v[0]); in round_fn8()
654 v[12] = xor_256(v[12], v[1]); in round_fn8()
655 v[13] = xor_256(v[13], v[2]); in round_fn8()
656 v[14] = xor_256(v[14], v[3]); in round_fn8()
657 v[15] = rot8_256(v[15]); in round_fn8()
658 v[12] = rot8_256(v[12]); in round_fn8()
659 v[13] = rot8_256(v[13]); in round_fn8()
660 v[14] = rot8_256(v[14]); in round_fn8()
661 v[10] = add_256(v[10], v[15]); in round_fn8()
662 v[11] = add_256(v[11], v[12]); in round_fn8()
663 v[8] = add_256(v[8], v[13]); in round_fn8()
664 v[9] = add_256(v[9], v[14]); in round_fn8()
665 v[5] = xor_256(v[5], v[10]); in round_fn8()
666 v[6] = xor_256(v[6], v[11]); in round_fn8()
667 v[7] = xor_256(v[7], v[8]); in round_fn8()
668 v[4] = xor_256(v[4], v[9]); in round_fn8()
669 v[5] = rot7_256(v[5]); in round_fn8()
670 v[6] = rot7_256(v[6]); in round_fn8()
671 v[7] = rot7_256(v[7]); in round_fn8()
672 v[4] = rot7_256(v[4]); in round_fn8()
769 __m256i v[16] = { in blake3_hash8_avx512() local
775 round_fn8(v, msg_vecs, 0); in blake3_hash8_avx512()
776 round_fn8(v, msg_vecs, 1); in blake3_hash8_avx512()
777 round_fn8(v, msg_vecs, 2); in blake3_hash8_avx512()
778 round_fn8(v, msg_vecs, 3); in blake3_hash8_avx512()
779 round_fn8(v, msg_vecs, 4); in blake3_hash8_avx512()
780 round_fn8(v, msg_vecs, 5); in blake3_hash8_avx512()
781 round_fn8(v, msg_vecs, 6); in blake3_hash8_avx512()
782 h_vecs[0] = xor_256(v[0], v[8]); in blake3_hash8_avx512()
783 h_vecs[1] = xor_256(v[1], v[9]); in blake3_hash8_avx512()
784 h_vecs[2] = xor_256(v[2], v[10]); in blake3_hash8_avx512()
785 h_vecs[3] = xor_256(v[3], v[11]); in blake3_hash8_avx512()
786 h_vecs[4] = xor_256(v[4], v[12]); in blake3_hash8_avx512()
787 h_vecs[5] = xor_256(v[5], v[13]); in blake3_hash8_avx512()
788 h_vecs[6] = xor_256(v[6], v[14]); in blake3_hash8_avx512()
789 h_vecs[7] = xor_256(v[7], v[15]); in blake3_hash8_avx512()
811 INLINE void round_fn16(__m512i v[16], __m512i m[16], size_t r) { in round_fn16()
812 v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][0]]); in round_fn16()
813 v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][2]]); in round_fn16()
814 v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][4]]); in round_fn16()
815 v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][6]]); in round_fn16()
816 v[0] = add_512(v[0], v[4]); in round_fn16()
817 v[1] = add_512(v[1], v[5]); in round_fn16()
818 v[2] = add_512(v[2], v[6]); in round_fn16()
819 v[3] = add_512(v[3], v[7]); in round_fn16()
820 v[12] = xor_512(v[12], v[0]); in round_fn16()
821 v[13] = xor_512(v[13], v[1]); in round_fn16()
822 v[14] = xor_512(v[14], v[2]); in round_fn16()
823 v[15] = xor_512(v[15], v[3]); in round_fn16()
824 v[12] = rot16_512(v[12]); in round_fn16()
825 v[13] = rot16_512(v[13]); in round_fn16()
826 v[14] = rot16_512(v[14]); in round_fn16()
827 v[15] = rot16_512(v[15]); in round_fn16()
828 v[8] = add_512(v[8], v[12]); in round_fn16()
829 v[9] = add_512(v[9], v[13]); in round_fn16()
830 v[10] = add_512(v[10], v[14]); in round_fn16()
831 v[11] = add_512(v[11], v[15]); in round_fn16()
832 v[4] = xor_512(v[4], v[8]); in round_fn16()
833 v[5] = xor_512(v[5], v[9]); in round_fn16()
834 v[6] = xor_512(v[6], v[10]); in round_fn16()
835 v[7] = xor_512(v[7], v[11]); in round_fn16()
836 v[4] = rot12_512(v[4]); in round_fn16()
837 v[5] = rot12_512(v[5]); in round_fn16()
838 v[6] = rot12_512(v[6]); in round_fn16()
839 v[7] = rot12_512(v[7]); in round_fn16()
840 v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][1]]); in round_fn16()
841 v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][3]]); in round_fn16()
842 v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][5]]); in round_fn16()
843 v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][7]]); in round_fn16()
844 v[0] = add_512(v[0], v[4]); in round_fn16()
845 v[1] = add_512(v[1], v[5]); in round_fn16()
846 v[2] = add_512(v[2], v[6]); in round_fn16()
847 v[3] = add_512(v[3], v[7]); in round_fn16()
848 v[12] = xor_512(v[12], v[0]); in round_fn16()
849 v[13] = xor_512(v[13], v[1]); in round_fn16()
850 v[14] = xor_512(v[14], v[2]); in round_fn16()
851 v[15] = xor_512(v[15], v[3]); in round_fn16()
852 v[12] = rot8_512(v[12]); in round_fn16()
853 v[13] = rot8_512(v[13]); in round_fn16()
854 v[14] = rot8_512(v[14]); in round_fn16()
855 v[15] = rot8_512(v[15]); in round_fn16()
856 v[8] = add_512(v[8], v[12]); in round_fn16()
857 v[9] = add_512(v[9], v[13]); in round_fn16()
858 v[10] = add_512(v[10], v[14]); in round_fn16()
859 v[11] = add_512(v[11], v[15]); in round_fn16()
860 v[4] = xor_512(v[4], v[8]); in round_fn16()
861 v[5] = xor_512(v[5], v[9]); in round_fn16()
862 v[6] = xor_512(v[6], v[10]); in round_fn16()
863 v[7] = xor_512(v[7], v[11]); in round_fn16()
864 v[4] = rot7_512(v[4]); in round_fn16()
865 v[5] = rot7_512(v[5]); in round_fn16()
866 v[6] = rot7_512(v[6]); in round_fn16()
867 v[7] = rot7_512(v[7]); in round_fn16()
869 v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][8]]); in round_fn16()
870 v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][10]]); in round_fn16()
871 v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][12]]); in round_fn16()
872 v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][14]]); in round_fn16()
873 v[0] = add_512(v[0], v[5]); in round_fn16()
874 v[1] = add_512(v[1], v[6]); in round_fn16()
875 v[2] = add_512(v[2], v[7]); in round_fn16()
876 v[3] = add_512(v[3], v[4]); in round_fn16()
877 v[15] = xor_512(v[15], v[0]); in round_fn16()
878 v[12] = xor_512(v[12], v[1]); in round_fn16()
879 v[13] = xor_512(v[13], v[2]); in round_fn16()
880 v[14] = xor_512(v[14], v[3]); in round_fn16()
881 v[15] = rot16_512(v[15]); in round_fn16()
882 v[12] = rot16_512(v[12]); in round_fn16()
883 v[13] = rot16_512(v[13]); in round_fn16()
884 v[14] = rot16_512(v[14]); in round_fn16()
885 v[10] = add_512(v[10], v[15]); in round_fn16()
886 v[11] = add_512(v[11], v[12]); in round_fn16()
887 v[8] = add_512(v[8], v[13]); in round_fn16()
888 v[9] = add_512(v[9], v[14]); in round_fn16()
889 v[5] = xor_512(v[5], v[10]); in round_fn16()
890 v[6] = xor_512(v[6], v[11]); in round_fn16()
891 v[7] = xor_512(v[7], v[8]); in round_fn16()
892 v[4] = xor_512(v[4], v[9]); in round_fn16()
893 v[5] = rot12_512(v[5]); in round_fn16()
894 v[6] = rot12_512(v[6]); in round_fn16()
895 v[7] = rot12_512(v[7]); in round_fn16()
896 v[4] = rot12_512(v[4]); in round_fn16()
897 v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][9]]); in round_fn16()
898 v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][11]]); in round_fn16()
899 v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][13]]); in round_fn16()
900 v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][15]]); in round_fn16()
901 v[0] = add_512(v[0], v[5]); in round_fn16()
902 v[1] = add_512(v[1], v[6]); in round_fn16()
903 v[2] = add_512(v[2], v[7]); in round_fn16()
904 v[3] = add_512(v[3], v[4]); in round_fn16()
905 v[15] = xor_512(v[15], v[0]); in round_fn16()
906 v[12] = xor_512(v[12], v[1]); in round_fn16()
907 v[13] = xor_512(v[13], v[2]); in round_fn16()
908 v[14] = xor_512(v[14], v[3]); in round_fn16()
909 v[15] = rot8_512(v[15]); in round_fn16()
910 v[12] = rot8_512(v[12]); in round_fn16()
911 v[13] = rot8_512(v[13]); in round_fn16()
912 v[14] = rot8_512(v[14]); in round_fn16()
913 v[10] = add_512(v[10], v[15]); in round_fn16()
914 v[11] = add_512(v[11], v[12]); in round_fn16()
915 v[8] = add_512(v[8], v[13]); in round_fn16()
916 v[9] = add_512(v[9], v[14]); in round_fn16()
917 v[5] = xor_512(v[5], v[10]); in round_fn16()
918 v[6] = xor_512(v[6], v[11]); in round_fn16()
919 v[7] = xor_512(v[7], v[8]); in round_fn16()
920 v[4] = xor_512(v[4], v[9]); in round_fn16()
921 v[5] = rot7_512(v[5]); in round_fn16()
922 v[6] = rot7_512(v[6]); in round_fn16()
923 v[7] = rot7_512(v[7]); in round_fn16()
924 v[4] = rot7_512(v[4]); in round_fn16()
1083 __m512i v[16] = { in blake3_hash16_avx512() local
1089 round_fn16(v, msg_vecs, 0); in blake3_hash16_avx512()
1090 round_fn16(v, msg_vecs, 1); in blake3_hash16_avx512()
1091 round_fn16(v, msg_vecs, 2); in blake3_hash16_avx512()
1092 round_fn16(v, msg_vecs, 3); in blake3_hash16_avx512()
1093 round_fn16(v, msg_vecs, 4); in blake3_hash16_avx512()
1094 round_fn16(v, msg_vecs, 5); in blake3_hash16_avx512()
1095 round_fn16(v, msg_vecs, 6); in blake3_hash16_avx512()
1096 h_vecs[0] = xor_512(v[0], v[8]); in blake3_hash16_avx512()
1097 h_vecs[1] = xor_512(v[1], v[9]); in blake3_hash16_avx512()
1098 h_vecs[2] = xor_512(v[2], v[10]); in blake3_hash16_avx512()
1099 h_vecs[3] = xor_512(v[3], v[11]); in blake3_hash16_avx512()
1100 h_vecs[4] = xor_512(v[4], v[12]); in blake3_hash16_avx512()
1101 h_vecs[5] = xor_512(v[5], v[13]); in blake3_hash16_avx512()
1102 h_vecs[6] = xor_512(v[6], v[14]); in blake3_hash16_avx512()
1103 h_vecs[7] = xor_512(v[7], v[15]); in blake3_hash16_avx512()