xref: /openbsd-src/lib/libcrypto/rc4/rc4.c (revision 902bfdca5279bfeffc5b4c006292470a4b73c32a)
1*902bfdcaSjsing /* $OpenBSD: rc4.c,v 1.13 2025/01/27 14:02:32 jsing Exp $ */
2578c3ea3Sjsing /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3578c3ea3Sjsing  * All rights reserved.
4578c3ea3Sjsing  *
5578c3ea3Sjsing  * This package is an SSL implementation written
6578c3ea3Sjsing  * by Eric Young (eay@cryptsoft.com).
7578c3ea3Sjsing  * The implementation was written so as to conform with Netscapes SSL.
8578c3ea3Sjsing  *
9578c3ea3Sjsing  * This library is free for commercial and non-commercial use as long as
10578c3ea3Sjsing  * the following conditions are aheared to.  The following conditions
11578c3ea3Sjsing  * apply to all code found in this distribution, be it the RC4, RSA,
12578c3ea3Sjsing  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13578c3ea3Sjsing  * included with this distribution is covered by the same copyright terms
14578c3ea3Sjsing  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15578c3ea3Sjsing  *
16578c3ea3Sjsing  * Copyright remains Eric Young's, and as such any Copyright notices in
17578c3ea3Sjsing  * the code are not to be removed.
18578c3ea3Sjsing  * If this package is used in a product, Eric Young should be given attribution
19578c3ea3Sjsing  * as the author of the parts of the library used.
20578c3ea3Sjsing  * This can be in the form of a textual message at program startup or
21578c3ea3Sjsing  * in documentation (online or textual) provided with the package.
22578c3ea3Sjsing  *
23578c3ea3Sjsing  * Redistribution and use in source and binary forms, with or without
24578c3ea3Sjsing  * modification, are permitted provided that the following conditions
25578c3ea3Sjsing  * are met:
26578c3ea3Sjsing  * 1. Redistributions of source code must retain the copyright
27578c3ea3Sjsing  *    notice, this list of conditions and the following disclaimer.
28578c3ea3Sjsing  * 2. Redistributions in binary form must reproduce the above copyright
29578c3ea3Sjsing  *    notice, this list of conditions and the following disclaimer in the
30578c3ea3Sjsing  *    documentation and/or other materials provided with the distribution.
31578c3ea3Sjsing  * 3. All advertising materials mentioning features or use of this software
32578c3ea3Sjsing  *    must display the following acknowledgement:
33578c3ea3Sjsing  *    "This product includes cryptographic software written by
34578c3ea3Sjsing  *     Eric Young (eay@cryptsoft.com)"
35578c3ea3Sjsing  *    The word 'cryptographic' can be left out if the rouines from the library
36578c3ea3Sjsing  *    being used are not cryptographic related :-).
37578c3ea3Sjsing  * 4. If you include any Windows specific code (or a derivative thereof) from
38578c3ea3Sjsing  *    the apps directory (application code) you must include an acknowledgement:
39578c3ea3Sjsing  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40578c3ea3Sjsing  *
41578c3ea3Sjsing  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42578c3ea3Sjsing  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43578c3ea3Sjsing  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44578c3ea3Sjsing  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45578c3ea3Sjsing  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46578c3ea3Sjsing  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47578c3ea3Sjsing  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48578c3ea3Sjsing  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49578c3ea3Sjsing  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50578c3ea3Sjsing  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51578c3ea3Sjsing  * SUCH DAMAGE.
52578c3ea3Sjsing  *
53578c3ea3Sjsing  * The licence and distribution terms for any publically available version or
54578c3ea3Sjsing  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55578c3ea3Sjsing  * copied and put under another distribution licence
56578c3ea3Sjsing  * [including the GNU Public Licence.]
57578c3ea3Sjsing  */
58578c3ea3Sjsing 
59578c3ea3Sjsing #include <endian.h>
60578c3ea3Sjsing 
61578c3ea3Sjsing #include <openssl/rc4.h>
62578c3ea3Sjsing 
6309b34817Sjsing #include "crypto_arch.h"
6409b34817Sjsing 
65578c3ea3Sjsing /* RC4 as implemented from a posting from
66578c3ea3Sjsing  * Newsgroups: sci.crypt
67578c3ea3Sjsing  * From: sterndark@netcom.com (David Sterndark)
68578c3ea3Sjsing  * Subject: RC4 Algorithm revealed.
69578c3ea3Sjsing  * Message-ID: <sternCvKL4B.Hyy@netcom.com>
70578c3ea3Sjsing  * Date: Wed, 14 Sep 1994 06:35:31 GMT
71578c3ea3Sjsing  */
72578c3ea3Sjsing 
73a0d14d73Sjsing #ifdef HAVE_RC4_INTERNAL
74a0d14d73Sjsing void rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata,
75a0d14d73Sjsing     unsigned char *outdata);
76a0d14d73Sjsing 
77a0d14d73Sjsing #else
78a0d14d73Sjsing static void
79a0d14d73Sjsing rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata,
80578c3ea3Sjsing     unsigned char *outdata)
81578c3ea3Sjsing {
82578c3ea3Sjsing 	RC4_INT *d;
83578c3ea3Sjsing 	RC4_INT x, y,tx, ty;
84578c3ea3Sjsing 	size_t i;
85578c3ea3Sjsing 
86578c3ea3Sjsing 	x = key->x;
87578c3ea3Sjsing 	y = key->y;
88578c3ea3Sjsing 	d = key->data;
89578c3ea3Sjsing 
90578c3ea3Sjsing #if defined(RC4_CHUNK)
91578c3ea3Sjsing 	/*
92578c3ea3Sjsing 	 * The original reason for implementing this(*) was the fact that
93578c3ea3Sjsing 	 * pre-21164a Alpha CPUs don't have byte load/store instructions
94578c3ea3Sjsing 	 * and e.g. a byte store has to be done with 64-bit load, shift,
95578c3ea3Sjsing 	 * and, or and finally 64-bit store. Peaking data and operating
96578c3ea3Sjsing 	 * at natural word size made it possible to reduce amount of
97578c3ea3Sjsing 	 * instructions as well as to perform early read-ahead without
98578c3ea3Sjsing 	 * suffering from RAW (read-after-write) hazard. This resulted
99578c3ea3Sjsing 	 * in ~40%(**) performance improvement on 21064 box with gcc.
100578c3ea3Sjsing 	 * But it's not only Alpha users who win here:-) Thanks to the
101578c3ea3Sjsing 	 * early-n-wide read-ahead this implementation also exhibits
102578c3ea3Sjsing 	 * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
103578c3ea3Sjsing 	 * on sizeof(RC4_INT)).
104578c3ea3Sjsing 	 *
105578c3ea3Sjsing 	 * (*)	"this" means code which recognizes the case when input
106578c3ea3Sjsing 	 *	and output pointers appear to be aligned at natural CPU
107578c3ea3Sjsing 	 *	word boundary
108578c3ea3Sjsing 	 * (**)	i.e. according to 'apps/openssl speed rc4' benchmark,
109578c3ea3Sjsing 	 *	crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
110578c3ea3Sjsing 	 *
111578c3ea3Sjsing 	 * Caveats.
112578c3ea3Sjsing 	 *
113578c3ea3Sjsing 	 * - RC4_CHUNK="unsigned long long" should be a #1 choice for
114578c3ea3Sjsing 	 *   UltraSPARC. Unfortunately gcc generates very slow code
115578c3ea3Sjsing 	 *   (2.5-3 times slower than one generated by Sun's WorkShop
116578c3ea3Sjsing 	 *   C) and therefore gcc (at least 2.95 and earlier) should
117578c3ea3Sjsing 	 *   always be told that RC4_CHUNK="unsigned long".
118578c3ea3Sjsing 	 *
119578c3ea3Sjsing 	 *					<appro@fy.chalmers.se>
120578c3ea3Sjsing 	 */
121578c3ea3Sjsing 
122578c3ea3Sjsing # define RC4_STEP	( \
123578c3ea3Sjsing 			x=(x+1) &0xff,	\
124578c3ea3Sjsing 			tx=d[x],	\
125578c3ea3Sjsing 			y=(tx+y)&0xff,	\
126578c3ea3Sjsing 			ty=d[y],	\
127578c3ea3Sjsing 			d[y]=tx,	\
128578c3ea3Sjsing 			d[x]=ty,	\
129578c3ea3Sjsing 			(RC4_CHUNK)d[(tx+ty)&0xff]\
130578c3ea3Sjsing 			)
131578c3ea3Sjsing 
132578c3ea3Sjsing 	if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) |
133578c3ea3Sjsing 	    ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) {
134578c3ea3Sjsing 		RC4_CHUNK ichunk, otp;
135578c3ea3Sjsing 
136578c3ea3Sjsing 		/*
137578c3ea3Sjsing 		 * I reckon we can afford to implement both endian
138578c3ea3Sjsing 		 * cases and to decide which way to take at run-time
139578c3ea3Sjsing 		 * because the machine code appears to be very compact
140578c3ea3Sjsing 		 * and redundant 1-2KB is perfectly tolerable (i.e.
141578c3ea3Sjsing 		 * in case the compiler fails to eliminate it:-). By
142578c3ea3Sjsing 		 * suggestion from Terrel Larson <terr@terralogic.net>.
143578c3ea3Sjsing 		 *
144578c3ea3Sjsing 		 * Special notes.
145578c3ea3Sjsing 		 *
146578c3ea3Sjsing 		 * - compilers (those I've tried) don't seem to have
147578c3ea3Sjsing 		 *   problems eliminating either the operators guarded
148578c3ea3Sjsing 		 *   by "if (sizeof(RC4_CHUNK)==8)" or the condition
149578c3ea3Sjsing 		 *   expressions themselves so I've got 'em to replace
150578c3ea3Sjsing 		 *   corresponding #ifdefs from the previous version;
151578c3ea3Sjsing 		 * - I chose to let the redundant switch cases when
152578c3ea3Sjsing 		 *   sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
153578c3ea3Sjsing 		 *   before);
154578c3ea3Sjsing 		 * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
155578c3ea3Sjsing 		 *   [LB]ESHFT guards against "shift is out of range"
156578c3ea3Sjsing 		 *   warnings when sizeof(RC4_CHUNK)!=8
157578c3ea3Sjsing 		 *
158578c3ea3Sjsing 		 *			<appro@fy.chalmers.se>
159578c3ea3Sjsing 		 */
160578c3ea3Sjsing #if BYTE_ORDER == BIG_ENDIAN
161578c3ea3Sjsing # define BESHFT(c)	(((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
162578c3ea3Sjsing 		for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
163578c3ea3Sjsing 			ichunk  = *(RC4_CHUNK *)indata;
164578c3ea3Sjsing 			otp = RC4_STEP << BESHFT(0);
165578c3ea3Sjsing 			otp |= RC4_STEP << BESHFT(1);
166578c3ea3Sjsing 			otp |= RC4_STEP << BESHFT(2);
167578c3ea3Sjsing 			otp |= RC4_STEP << BESHFT(3);
168578c3ea3Sjsing 			if (sizeof(RC4_CHUNK) == 8) {
169578c3ea3Sjsing 				otp |= RC4_STEP << BESHFT(4);
170578c3ea3Sjsing 				otp |= RC4_STEP << BESHFT(5);
171578c3ea3Sjsing 				otp |= RC4_STEP << BESHFT(6);
172578c3ea3Sjsing 				otp |= RC4_STEP << BESHFT(7);
173578c3ea3Sjsing 			}
174578c3ea3Sjsing 			*(RC4_CHUNK *)outdata = otp^ichunk;
175578c3ea3Sjsing 			indata += sizeof(RC4_CHUNK);
176578c3ea3Sjsing 			outdata += sizeof(RC4_CHUNK);
177578c3ea3Sjsing 		}
178578c3ea3Sjsing #else
179578c3ea3Sjsing # define LESHFT(c)	(((c)*8)&(sizeof(RC4_CHUNK)*8-1))
180578c3ea3Sjsing 		for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
181578c3ea3Sjsing 			ichunk = *(RC4_CHUNK *)indata;
182578c3ea3Sjsing 			otp = RC4_STEP;
183578c3ea3Sjsing 			otp |= RC4_STEP << 8;
184578c3ea3Sjsing 			otp |= RC4_STEP << 16;
185578c3ea3Sjsing 			otp |= RC4_STEP << 24;
186578c3ea3Sjsing 			if (sizeof(RC4_CHUNK) == 8) {
187578c3ea3Sjsing 				otp |= RC4_STEP << LESHFT(4);
188578c3ea3Sjsing 				otp |= RC4_STEP << LESHFT(5);
189578c3ea3Sjsing 				otp |= RC4_STEP << LESHFT(6);
190578c3ea3Sjsing 				otp |= RC4_STEP << LESHFT(7);
191578c3ea3Sjsing 			}
192578c3ea3Sjsing 			*(RC4_CHUNK *)outdata = otp ^ ichunk;
193578c3ea3Sjsing 			indata += sizeof(RC4_CHUNK);
194578c3ea3Sjsing 			outdata += sizeof(RC4_CHUNK);
195578c3ea3Sjsing 		}
196578c3ea3Sjsing #endif
197578c3ea3Sjsing 	}
198578c3ea3Sjsing #endif
199*902bfdcaSjsing #define RC4_LOOP(in,out) \
200578c3ea3Sjsing 		x=((x+1)&0xff); \
201578c3ea3Sjsing 		tx=d[x]; \
202578c3ea3Sjsing 		y=(tx+y)&0xff; \
203578c3ea3Sjsing 		d[x]=ty=d[y]; \
204578c3ea3Sjsing 		d[y]=tx; \
205578c3ea3Sjsing 		(out) = d[(tx+ty)&0xff]^ (in);
206578c3ea3Sjsing 
207578c3ea3Sjsing 	i = len >> 3;
208578c3ea3Sjsing 	if (i) {
209578c3ea3Sjsing 		for (;;) {
210*902bfdcaSjsing 			RC4_LOOP(indata[0], outdata[0]);
211*902bfdcaSjsing 			RC4_LOOP(indata[1], outdata[1]);
212*902bfdcaSjsing 			RC4_LOOP(indata[2], outdata[2]);
213*902bfdcaSjsing 			RC4_LOOP(indata[3], outdata[3]);
214*902bfdcaSjsing 			RC4_LOOP(indata[4], outdata[4]);
215*902bfdcaSjsing 			RC4_LOOP(indata[5], outdata[5]);
216*902bfdcaSjsing 			RC4_LOOP(indata[6], outdata[6]);
217*902bfdcaSjsing 			RC4_LOOP(indata[7], outdata[7]);
218*902bfdcaSjsing 
219578c3ea3Sjsing 			indata += 8;
220578c3ea3Sjsing 			outdata += 8;
221*902bfdcaSjsing 
222578c3ea3Sjsing 			if (--i == 0)
223578c3ea3Sjsing 				break;
224578c3ea3Sjsing 		}
225578c3ea3Sjsing 	}
226578c3ea3Sjsing 	i = len&0x07;
227578c3ea3Sjsing 	if (i) {
228578c3ea3Sjsing 		for (;;) {
229*902bfdcaSjsing 			RC4_LOOP(indata[0], outdata[0]);
230578c3ea3Sjsing 			if (--i == 0)
231578c3ea3Sjsing 				break;
232*902bfdcaSjsing 			RC4_LOOP(indata[1], outdata[1]);
233578c3ea3Sjsing 			if (--i == 0)
234578c3ea3Sjsing 				break;
235*902bfdcaSjsing 			RC4_LOOP(indata[2], outdata[2]);
236578c3ea3Sjsing 			if (--i == 0)
237578c3ea3Sjsing 				break;
238*902bfdcaSjsing 			RC4_LOOP(indata[3], outdata[3]);
239578c3ea3Sjsing 			if (--i == 0)
240578c3ea3Sjsing 				break;
241*902bfdcaSjsing 			RC4_LOOP(indata[4], outdata[4]);
242578c3ea3Sjsing 			if (--i == 0)
243578c3ea3Sjsing 				break;
244*902bfdcaSjsing 			RC4_LOOP(indata[5], outdata[5]);
245578c3ea3Sjsing 			if (--i == 0)
246578c3ea3Sjsing 				break;
247*902bfdcaSjsing 			RC4_LOOP(indata[6], outdata[6]);
248578c3ea3Sjsing 			if (--i == 0)
249578c3ea3Sjsing 				break;
250578c3ea3Sjsing 		}
251578c3ea3Sjsing 	}
252578c3ea3Sjsing 	key->x = x;
253578c3ea3Sjsing 	key->y = y;
254578c3ea3Sjsing }
255a0d14d73Sjsing #endif
256578c3ea3Sjsing 
257a0d14d73Sjsing #ifdef HAVE_RC4_SET_KEY_INTERNAL
258a0d14d73Sjsing void rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data);
259a0d14d73Sjsing 
260a0d14d73Sjsing #else
2613715bfbfSjsing static inline void
262a0d14d73Sjsing rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data)
263578c3ea3Sjsing {
264578c3ea3Sjsing 	RC4_INT tmp;
265578c3ea3Sjsing 	int id1, id2;
266578c3ea3Sjsing 	RC4_INT *d;
267578c3ea3Sjsing 	unsigned int i;
268578c3ea3Sjsing 
269578c3ea3Sjsing 	d = &(key->data[0]);
270578c3ea3Sjsing 	key->x = 0;
271578c3ea3Sjsing 	key->y = 0;
272578c3ea3Sjsing 	id1 = id2 = 0;
273578c3ea3Sjsing 
274578c3ea3Sjsing #define SK_LOOP(d,n) { \
275578c3ea3Sjsing 		tmp=d[(n)]; \
276578c3ea3Sjsing 		id2 = (data[id1] + tmp + id2) & 0xff; \
277578c3ea3Sjsing 		if (++id1 == len) id1=0; \
278578c3ea3Sjsing 		d[(n)]=d[id2]; \
279578c3ea3Sjsing 		d[id2]=tmp; }
280578c3ea3Sjsing 
281578c3ea3Sjsing 	for (i = 0; i < 256; i++)
282578c3ea3Sjsing 		d[i] = i;
283578c3ea3Sjsing 	for (i = 0; i < 256; i += 4) {
284578c3ea3Sjsing 		SK_LOOP(d, i + 0);
285578c3ea3Sjsing 		SK_LOOP(d, i + 1);
286578c3ea3Sjsing 		SK_LOOP(d, i + 2);
287578c3ea3Sjsing 		SK_LOOP(d, i + 3);
288578c3ea3Sjsing 	}
289578c3ea3Sjsing }
290a0d14d73Sjsing #endif
291a0d14d73Sjsing 
292a0d14d73Sjsing void
293a0d14d73Sjsing RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
294a0d14d73Sjsing     unsigned char *outdata)
295a0d14d73Sjsing {
296a0d14d73Sjsing 	rc4_internal(key, len, indata, outdata);
297a0d14d73Sjsing }
29842e47793Sjoshua LCRYPTO_ALIAS(RC4);
299a0d14d73Sjsing 
300a0d14d73Sjsing void
301a0d14d73Sjsing RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
302a0d14d73Sjsing {
303a0d14d73Sjsing 	rc4_set_key_internal(key, len, data);
304a0d14d73Sjsing }
30542e47793Sjoshua LCRYPTO_ALIAS(RC4_set_key);
306