xref: /isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c (revision 9ab5a9e579c4fb4e2a3c92d73ccd6d97291d0e80)
1 #include "ec_base_vsx.h"
2 
3 void
4 gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
5                  unsigned char **dest)
6 {
7         unsigned char *s, *t0, *t1;
8         vector unsigned char vX1, vX2, vX3, vX4;
9         vector unsigned char vY1, vY2, vY3, vY4;
10         vector unsigned char vYD, vYE, vYF, vYG;
11         vector unsigned char vhi0, vlo0, vhi1, vlo1;
12         int i, head;
13 
14         s = (unsigned char *) src;
15         t0 = (unsigned char *) dest[0];
16         t1 = (unsigned char *) dest[1];
17 
18         head = len % 64;
19         if (head != 0) {
20                 gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
21                 gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
22         }
23 
24         vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
25         vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
26         vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
27         vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
28 
29         for (i = head; i < len - 63; i += 64) {
30                 vX1 = vec_xl(0, s + i);
31                 vX2 = vec_xl(16, s + i);
32                 vX3 = vec_xl(32, s + i);
33                 vX4 = vec_xl(48, s + i);
34 
35                 vY1 = vec_xl(0, t0 + i);
36                 vY2 = vec_xl(16, t0 + i);
37                 vYD = vec_xl(32, t0 + i);
38                 vYE = vec_xl(48, t0 + i);
39 
40                 vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
41                 vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
42                 vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
43                 vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
44 
45                 vY3 = vec_xl(0, t1 + i);
46                 vY4 = vec_xl(16, t1 + i);
47                 vYF = vec_xl(32, t1 + i);
48                 vYG = vec_xl(48, t1 + i);
49 
50                 vec_xst(vY1, 0, t0 + i);
51                 vec_xst(vY2, 16, t0 + i);
52                 vec_xst(vYD, 32, t0 + i);
53                 vec_xst(vYE, 48, t0 + i);
54 
55                 vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
56                 vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
57                 vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
58                 vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
59 
60                 vec_xst(vY3, 0, t1 + i);
61                 vec_xst(vY4, 16, t1 + i);
62                 vec_xst(vYF, 32, t1 + i);
63                 vec_xst(vYG, 48, t1 + i);
64         }
65         return;
66 }
67