1*ba0c8ad5Sriastradh /* $NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $ */
2*ba0c8ad5Sriastradh
3*ba0c8ad5Sriastradh /*-
4*ba0c8ad5Sriastradh * Copyright (c) 2020 The NetBSD Foundation, Inc.
5*ba0c8ad5Sriastradh * All rights reserved.
6*ba0c8ad5Sriastradh *
7*ba0c8ad5Sriastradh * Redistribution and use in source and binary forms, with or without
8*ba0c8ad5Sriastradh * modification, are permitted provided that the following conditions
9*ba0c8ad5Sriastradh * are met:
10*ba0c8ad5Sriastradh * 1. Redistributions of source code must retain the above copyright
11*ba0c8ad5Sriastradh * notice, this list of conditions and the following disclaimer.
12*ba0c8ad5Sriastradh * 2. Redistributions in binary form must reproduce the above copyright
13*ba0c8ad5Sriastradh * notice, this list of conditions and the following disclaimer in the
14*ba0c8ad5Sriastradh * documentation and/or other materials provided with the distribution.
15*ba0c8ad5Sriastradh *
16*ba0c8ad5Sriastradh * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17*ba0c8ad5Sriastradh * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18*ba0c8ad5Sriastradh * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19*ba0c8ad5Sriastradh * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20*ba0c8ad5Sriastradh * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21*ba0c8ad5Sriastradh * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22*ba0c8ad5Sriastradh * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23*ba0c8ad5Sriastradh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24*ba0c8ad5Sriastradh * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25*ba0c8ad5Sriastradh * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26*ba0c8ad5Sriastradh * POSSIBILITY OF SUCH DAMAGE.
27*ba0c8ad5Sriastradh */
28*ba0c8ad5Sriastradh
29*ba0c8ad5Sriastradh #include <sys/cdefs.h>
30*ba0c8ad5Sriastradh __KERNEL_RCSID(1, "$NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $");
31*ba0c8ad5Sriastradh
32*ba0c8ad5Sriastradh #include "chacha_sse2.h"
33*ba0c8ad5Sriastradh
34*ba0c8ad5Sriastradh #ifdef _KERNEL
35*ba0c8ad5Sriastradh #include <x86/cpu.h>
36*ba0c8ad5Sriastradh #include <x86/fpu.h>
37*ba0c8ad5Sriastradh #else
38*ba0c8ad5Sriastradh #include <sys/sysctl.h>
39*ba0c8ad5Sriastradh #include <cpuid.h>
40*ba0c8ad5Sriastradh #include <stddef.h>
41*ba0c8ad5Sriastradh #define fpu_kern_enter() ((void)0)
42*ba0c8ad5Sriastradh #define fpu_kern_leave() ((void)0)
43*ba0c8ad5Sriastradh #endif
44*ba0c8ad5Sriastradh
45*ba0c8ad5Sriastradh static void
chacha_core_sse2_impl(uint8_t out[restrict static64],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)46*ba0c8ad5Sriastradh chacha_core_sse2_impl(uint8_t out[restrict static 64],
47*ba0c8ad5Sriastradh const uint8_t in[static 16],
48*ba0c8ad5Sriastradh const uint8_t k[static 32],
49*ba0c8ad5Sriastradh const uint8_t c[static 16],
50*ba0c8ad5Sriastradh unsigned nr)
51*ba0c8ad5Sriastradh {
52*ba0c8ad5Sriastradh
53*ba0c8ad5Sriastradh fpu_kern_enter();
54*ba0c8ad5Sriastradh chacha_core_sse2(out, in, k, c, nr);
55*ba0c8ad5Sriastradh fpu_kern_leave();
56*ba0c8ad5Sriastradh }
57*ba0c8ad5Sriastradh
58*ba0c8ad5Sriastradh static void
hchacha_sse2_impl(uint8_t out[restrict static32],const uint8_t in[static16],const uint8_t k[static32],const uint8_t c[static16],unsigned nr)59*ba0c8ad5Sriastradh hchacha_sse2_impl(uint8_t out[restrict static 32],
60*ba0c8ad5Sriastradh const uint8_t in[static 16],
61*ba0c8ad5Sriastradh const uint8_t k[static 32],
62*ba0c8ad5Sriastradh const uint8_t c[static 16],
63*ba0c8ad5Sriastradh unsigned nr)
64*ba0c8ad5Sriastradh {
65*ba0c8ad5Sriastradh
66*ba0c8ad5Sriastradh fpu_kern_enter();
67*ba0c8ad5Sriastradh hchacha_sse2(out, in, k, c, nr);
68*ba0c8ad5Sriastradh fpu_kern_leave();
69*ba0c8ad5Sriastradh }
70*ba0c8ad5Sriastradh
71*ba0c8ad5Sriastradh static void
chacha_stream_sse2_impl(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t key[static32],unsigned nr)72*ba0c8ad5Sriastradh chacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
73*ba0c8ad5Sriastradh const uint8_t nonce[static 12],
74*ba0c8ad5Sriastradh const uint8_t key[static 32],
75*ba0c8ad5Sriastradh unsigned nr)
76*ba0c8ad5Sriastradh {
77*ba0c8ad5Sriastradh
78*ba0c8ad5Sriastradh fpu_kern_enter();
79*ba0c8ad5Sriastradh chacha_stream_sse2(s, nbytes, blkno, nonce, key, nr);
80*ba0c8ad5Sriastradh fpu_kern_leave();
81*ba0c8ad5Sriastradh }
82*ba0c8ad5Sriastradh
83*ba0c8ad5Sriastradh static void
chacha_stream_xor_sse2_impl(uint8_t * c,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static12],const uint8_t key[static32],unsigned nr)84*ba0c8ad5Sriastradh chacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes,
85*ba0c8ad5Sriastradh uint32_t blkno,
86*ba0c8ad5Sriastradh const uint8_t nonce[static 12],
87*ba0c8ad5Sriastradh const uint8_t key[static 32],
88*ba0c8ad5Sriastradh unsigned nr)
89*ba0c8ad5Sriastradh {
90*ba0c8ad5Sriastradh
91*ba0c8ad5Sriastradh fpu_kern_enter();
92*ba0c8ad5Sriastradh chacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr);
93*ba0c8ad5Sriastradh fpu_kern_leave();
94*ba0c8ad5Sriastradh }
95*ba0c8ad5Sriastradh
96*ba0c8ad5Sriastradh static void
xchacha_stream_sse2_impl(uint8_t * restrict s,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t key[static32],unsigned nr)97*ba0c8ad5Sriastradh xchacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
98*ba0c8ad5Sriastradh const uint8_t nonce[static 24],
99*ba0c8ad5Sriastradh const uint8_t key[static 32],
100*ba0c8ad5Sriastradh unsigned nr)
101*ba0c8ad5Sriastradh {
102*ba0c8ad5Sriastradh
103*ba0c8ad5Sriastradh fpu_kern_enter();
104*ba0c8ad5Sriastradh xchacha_stream_sse2(s, nbytes, blkno, nonce, key, nr);
105*ba0c8ad5Sriastradh fpu_kern_leave();
106*ba0c8ad5Sriastradh }
107*ba0c8ad5Sriastradh
108*ba0c8ad5Sriastradh static void
xchacha_stream_xor_sse2_impl(uint8_t * c,const uint8_t * p,size_t nbytes,uint32_t blkno,const uint8_t nonce[static24],const uint8_t key[static32],unsigned nr)109*ba0c8ad5Sriastradh xchacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes,
110*ba0c8ad5Sriastradh uint32_t blkno,
111*ba0c8ad5Sriastradh const uint8_t nonce[static 24],
112*ba0c8ad5Sriastradh const uint8_t key[static 32],
113*ba0c8ad5Sriastradh unsigned nr)
114*ba0c8ad5Sriastradh {
115*ba0c8ad5Sriastradh
116*ba0c8ad5Sriastradh fpu_kern_enter();
117*ba0c8ad5Sriastradh xchacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr);
118*ba0c8ad5Sriastradh fpu_kern_leave();
119*ba0c8ad5Sriastradh }
120*ba0c8ad5Sriastradh
121*ba0c8ad5Sriastradh static int
chacha_probe_sse2(void)122*ba0c8ad5Sriastradh chacha_probe_sse2(void)
123*ba0c8ad5Sriastradh {
124*ba0c8ad5Sriastradh
125*ba0c8ad5Sriastradh /* Verify that the CPU supports SSE and SSE2. */
126*ba0c8ad5Sriastradh #ifdef _KERNEL
127*ba0c8ad5Sriastradh if (!i386_has_sse)
128*ba0c8ad5Sriastradh return -1;
129*ba0c8ad5Sriastradh if (!i386_has_sse2)
130*ba0c8ad5Sriastradh return -1;
131*ba0c8ad5Sriastradh #else
132*ba0c8ad5Sriastradh unsigned eax, ebx, ecx, edx;
133*ba0c8ad5Sriastradh if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
134*ba0c8ad5Sriastradh return -1;
135*ba0c8ad5Sriastradh if ((edx & bit_SSE) == 0)
136*ba0c8ad5Sriastradh return -1;
137*ba0c8ad5Sriastradh if ((edx & bit_SSE2) == 0)
138*ba0c8ad5Sriastradh return -1;
139*ba0c8ad5Sriastradh #endif
140*ba0c8ad5Sriastradh
141*ba0c8ad5Sriastradh return 0;
142*ba0c8ad5Sriastradh }
143*ba0c8ad5Sriastradh
144*ba0c8ad5Sriastradh const struct chacha_impl chacha_sse2_impl = {
145*ba0c8ad5Sriastradh .ci_name = "x86 SSE2 ChaCha",
146*ba0c8ad5Sriastradh .ci_probe = chacha_probe_sse2,
147*ba0c8ad5Sriastradh .ci_chacha_core = chacha_core_sse2_impl,
148*ba0c8ad5Sriastradh .ci_hchacha = hchacha_sse2_impl,
149*ba0c8ad5Sriastradh .ci_chacha_stream = chacha_stream_sse2_impl,
150*ba0c8ad5Sriastradh .ci_chacha_stream_xor = chacha_stream_xor_sse2_impl,
151*ba0c8ad5Sriastradh .ci_xchacha_stream = xchacha_stream_sse2_impl,
152*ba0c8ad5Sriastradh .ci_xchacha_stream_xor = xchacha_stream_xor_sse2_impl,
153*ba0c8ad5Sriastradh };
154