1*902bfdcaSjsing /* $OpenBSD: rc4.c,v 1.13 2025/01/27 14:02:32 jsing Exp $ */ 2578c3ea3Sjsing /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3578c3ea3Sjsing * All rights reserved. 4578c3ea3Sjsing * 5578c3ea3Sjsing * This package is an SSL implementation written 6578c3ea3Sjsing * by Eric Young (eay@cryptsoft.com). 7578c3ea3Sjsing * The implementation was written so as to conform with Netscapes SSL. 8578c3ea3Sjsing * 9578c3ea3Sjsing * This library is free for commercial and non-commercial use as long as 10578c3ea3Sjsing * the following conditions are aheared to. The following conditions 11578c3ea3Sjsing * apply to all code found in this distribution, be it the RC4, RSA, 12578c3ea3Sjsing * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13578c3ea3Sjsing * included with this distribution is covered by the same copyright terms 14578c3ea3Sjsing * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15578c3ea3Sjsing * 16578c3ea3Sjsing * Copyright remains Eric Young's, and as such any Copyright notices in 17578c3ea3Sjsing * the code are not to be removed. 18578c3ea3Sjsing * If this package is used in a product, Eric Young should be given attribution 19578c3ea3Sjsing * as the author of the parts of the library used. 20578c3ea3Sjsing * This can be in the form of a textual message at program startup or 21578c3ea3Sjsing * in documentation (online or textual) provided with the package. 22578c3ea3Sjsing * 23578c3ea3Sjsing * Redistribution and use in source and binary forms, with or without 24578c3ea3Sjsing * modification, are permitted provided that the following conditions 25578c3ea3Sjsing * are met: 26578c3ea3Sjsing * 1. Redistributions of source code must retain the copyright 27578c3ea3Sjsing * notice, this list of conditions and the following disclaimer. 28578c3ea3Sjsing * 2. Redistributions in binary form must reproduce the above copyright 29578c3ea3Sjsing * notice, this list of conditions and the following disclaimer in the 30578c3ea3Sjsing * documentation and/or other materials provided with the distribution. 31578c3ea3Sjsing * 3. All advertising materials mentioning features or use of this software 32578c3ea3Sjsing * must display the following acknowledgement: 33578c3ea3Sjsing * "This product includes cryptographic software written by 34578c3ea3Sjsing * Eric Young (eay@cryptsoft.com)" 35578c3ea3Sjsing * The word 'cryptographic' can be left out if the rouines from the library 36578c3ea3Sjsing * being used are not cryptographic related :-). 37578c3ea3Sjsing * 4. If you include any Windows specific code (or a derivative thereof) from 38578c3ea3Sjsing * the apps directory (application code) you must include an acknowledgement: 39578c3ea3Sjsing * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40578c3ea3Sjsing * 41578c3ea3Sjsing * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42578c3ea3Sjsing * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43578c3ea3Sjsing * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44578c3ea3Sjsing * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45578c3ea3Sjsing * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46578c3ea3Sjsing * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47578c3ea3Sjsing * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48578c3ea3Sjsing * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49578c3ea3Sjsing * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50578c3ea3Sjsing * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51578c3ea3Sjsing * SUCH DAMAGE. 52578c3ea3Sjsing * 53578c3ea3Sjsing * The licence and distribution terms for any publically available version or 54578c3ea3Sjsing * derivative of this code cannot be changed. i.e. this code cannot simply be 55578c3ea3Sjsing * copied and put under another distribution licence 56578c3ea3Sjsing * [including the GNU Public Licence.] 57578c3ea3Sjsing */ 58578c3ea3Sjsing 59578c3ea3Sjsing #include <endian.h> 60578c3ea3Sjsing 61578c3ea3Sjsing #include <openssl/rc4.h> 62578c3ea3Sjsing 6309b34817Sjsing #include "crypto_arch.h" 6409b34817Sjsing 65578c3ea3Sjsing /* RC4 as implemented from a posting from 66578c3ea3Sjsing * Newsgroups: sci.crypt 67578c3ea3Sjsing * From: sterndark@netcom.com (David Sterndark) 68578c3ea3Sjsing * Subject: RC4 Algorithm revealed. 69578c3ea3Sjsing * Message-ID: <sternCvKL4B.Hyy@netcom.com> 70578c3ea3Sjsing * Date: Wed, 14 Sep 1994 06:35:31 GMT 71578c3ea3Sjsing */ 72578c3ea3Sjsing 73a0d14d73Sjsing #ifdef HAVE_RC4_INTERNAL 74a0d14d73Sjsing void rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, 75a0d14d73Sjsing unsigned char *outdata); 76a0d14d73Sjsing 77a0d14d73Sjsing #else 78a0d14d73Sjsing static void 79a0d14d73Sjsing rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, 80578c3ea3Sjsing unsigned char *outdata) 81578c3ea3Sjsing { 82578c3ea3Sjsing RC4_INT *d; 83578c3ea3Sjsing RC4_INT x, y,tx, ty; 84578c3ea3Sjsing size_t i; 85578c3ea3Sjsing 86578c3ea3Sjsing x = key->x; 87578c3ea3Sjsing y = key->y; 88578c3ea3Sjsing d = key->data; 89578c3ea3Sjsing 90578c3ea3Sjsing #if defined(RC4_CHUNK) 91578c3ea3Sjsing /* 92578c3ea3Sjsing * The original reason for implementing this(*) was the fact that 93578c3ea3Sjsing * pre-21164a Alpha CPUs don't have byte load/store instructions 94578c3ea3Sjsing * and e.g. a byte store has to be done with 64-bit load, shift, 95578c3ea3Sjsing * and, or and finally 64-bit store. Peaking data and operating 96578c3ea3Sjsing * at natural word size made it possible to reduce amount of 97578c3ea3Sjsing * instructions as well as to perform early read-ahead without 98578c3ea3Sjsing * suffering from RAW (read-after-write) hazard. This resulted 99578c3ea3Sjsing * in ~40%(**) performance improvement on 21064 box with gcc. 100578c3ea3Sjsing * But it's not only Alpha users who win here:-) Thanks to the 101578c3ea3Sjsing * early-n-wide read-ahead this implementation also exhibits 102578c3ea3Sjsing * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending 103578c3ea3Sjsing * on sizeof(RC4_INT)). 104578c3ea3Sjsing * 105578c3ea3Sjsing * (*) "this" means code which recognizes the case when input 106578c3ea3Sjsing * and output pointers appear to be aligned at natural CPU 107578c3ea3Sjsing * word boundary 108578c3ea3Sjsing * (**) i.e. according to 'apps/openssl speed rc4' benchmark, 109578c3ea3Sjsing * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... 110578c3ea3Sjsing * 111578c3ea3Sjsing * Caveats. 112578c3ea3Sjsing * 113578c3ea3Sjsing * - RC4_CHUNK="unsigned long long" should be a #1 choice for 114578c3ea3Sjsing * UltraSPARC. Unfortunately gcc generates very slow code 115578c3ea3Sjsing * (2.5-3 times slower than one generated by Sun's WorkShop 116578c3ea3Sjsing * C) and therefore gcc (at least 2.95 and earlier) should 117578c3ea3Sjsing * always be told that RC4_CHUNK="unsigned long". 118578c3ea3Sjsing * 119578c3ea3Sjsing * <appro@fy.chalmers.se> 120578c3ea3Sjsing */ 121578c3ea3Sjsing 122578c3ea3Sjsing # define RC4_STEP ( \ 123578c3ea3Sjsing x=(x+1) &0xff, \ 124578c3ea3Sjsing tx=d[x], \ 125578c3ea3Sjsing y=(tx+y)&0xff, \ 126578c3ea3Sjsing ty=d[y], \ 127578c3ea3Sjsing d[y]=tx, \ 128578c3ea3Sjsing d[x]=ty, \ 129578c3ea3Sjsing (RC4_CHUNK)d[(tx+ty)&0xff]\ 130578c3ea3Sjsing ) 131578c3ea3Sjsing 132578c3ea3Sjsing if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) | 133578c3ea3Sjsing ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) { 134578c3ea3Sjsing RC4_CHUNK ichunk, otp; 135578c3ea3Sjsing 136578c3ea3Sjsing /* 137578c3ea3Sjsing * I reckon we can afford to implement both endian 138578c3ea3Sjsing * cases and to decide which way to take at run-time 139578c3ea3Sjsing * because the machine code appears to be very compact 140578c3ea3Sjsing * and redundant 1-2KB is perfectly tolerable (i.e. 141578c3ea3Sjsing * in case the compiler fails to eliminate it:-). By 142578c3ea3Sjsing * suggestion from Terrel Larson <terr@terralogic.net>. 143578c3ea3Sjsing * 144578c3ea3Sjsing * Special notes. 145578c3ea3Sjsing * 146578c3ea3Sjsing * - compilers (those I've tried) don't seem to have 147578c3ea3Sjsing * problems eliminating either the operators guarded 148578c3ea3Sjsing * by "if (sizeof(RC4_CHUNK)==8)" or the condition 149578c3ea3Sjsing * expressions themselves so I've got 'em to replace 150578c3ea3Sjsing * corresponding #ifdefs from the previous version; 151578c3ea3Sjsing * - I chose to let the redundant switch cases when 152578c3ea3Sjsing * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed 153578c3ea3Sjsing * before); 154578c3ea3Sjsing * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in 155578c3ea3Sjsing * [LB]ESHFT guards against "shift is out of range" 156578c3ea3Sjsing * warnings when sizeof(RC4_CHUNK)!=8 157578c3ea3Sjsing * 158578c3ea3Sjsing * <appro@fy.chalmers.se> 159578c3ea3Sjsing */ 160578c3ea3Sjsing #if BYTE_ORDER == BIG_ENDIAN 161578c3ea3Sjsing # define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) 162578c3ea3Sjsing for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { 163578c3ea3Sjsing ichunk = *(RC4_CHUNK *)indata; 164578c3ea3Sjsing otp = RC4_STEP << BESHFT(0); 165578c3ea3Sjsing otp |= RC4_STEP << BESHFT(1); 166578c3ea3Sjsing otp |= RC4_STEP << BESHFT(2); 167578c3ea3Sjsing otp |= RC4_STEP << BESHFT(3); 168578c3ea3Sjsing if (sizeof(RC4_CHUNK) == 8) { 169578c3ea3Sjsing otp |= RC4_STEP << BESHFT(4); 170578c3ea3Sjsing otp |= RC4_STEP << BESHFT(5); 171578c3ea3Sjsing otp |= RC4_STEP << BESHFT(6); 172578c3ea3Sjsing otp |= RC4_STEP << BESHFT(7); 173578c3ea3Sjsing } 174578c3ea3Sjsing *(RC4_CHUNK *)outdata = otp^ichunk; 175578c3ea3Sjsing indata += sizeof(RC4_CHUNK); 176578c3ea3Sjsing outdata += sizeof(RC4_CHUNK); 177578c3ea3Sjsing } 178578c3ea3Sjsing #else 179578c3ea3Sjsing # define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) 180578c3ea3Sjsing for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { 181578c3ea3Sjsing ichunk = *(RC4_CHUNK *)indata; 182578c3ea3Sjsing otp = RC4_STEP; 183578c3ea3Sjsing otp |= RC4_STEP << 8; 184578c3ea3Sjsing otp |= RC4_STEP << 16; 185578c3ea3Sjsing otp |= RC4_STEP << 24; 186578c3ea3Sjsing if (sizeof(RC4_CHUNK) == 8) { 187578c3ea3Sjsing otp |= RC4_STEP << LESHFT(4); 188578c3ea3Sjsing otp |= RC4_STEP << LESHFT(5); 189578c3ea3Sjsing otp |= RC4_STEP << LESHFT(6); 190578c3ea3Sjsing otp |= RC4_STEP << LESHFT(7); 191578c3ea3Sjsing } 192578c3ea3Sjsing *(RC4_CHUNK *)outdata = otp ^ ichunk; 193578c3ea3Sjsing indata += sizeof(RC4_CHUNK); 194578c3ea3Sjsing outdata += sizeof(RC4_CHUNK); 195578c3ea3Sjsing } 196578c3ea3Sjsing #endif 197578c3ea3Sjsing } 198578c3ea3Sjsing #endif 199*902bfdcaSjsing #define RC4_LOOP(in,out) \ 200578c3ea3Sjsing x=((x+1)&0xff); \ 201578c3ea3Sjsing tx=d[x]; \ 202578c3ea3Sjsing y=(tx+y)&0xff; \ 203578c3ea3Sjsing d[x]=ty=d[y]; \ 204578c3ea3Sjsing d[y]=tx; \ 205578c3ea3Sjsing (out) = d[(tx+ty)&0xff]^ (in); 206578c3ea3Sjsing 207578c3ea3Sjsing i = len >> 3; 208578c3ea3Sjsing if (i) { 209578c3ea3Sjsing for (;;) { 210*902bfdcaSjsing RC4_LOOP(indata[0], outdata[0]); 211*902bfdcaSjsing RC4_LOOP(indata[1], outdata[1]); 212*902bfdcaSjsing RC4_LOOP(indata[2], outdata[2]); 213*902bfdcaSjsing RC4_LOOP(indata[3], outdata[3]); 214*902bfdcaSjsing RC4_LOOP(indata[4], outdata[4]); 215*902bfdcaSjsing RC4_LOOP(indata[5], outdata[5]); 216*902bfdcaSjsing RC4_LOOP(indata[6], outdata[6]); 217*902bfdcaSjsing RC4_LOOP(indata[7], outdata[7]); 218*902bfdcaSjsing 219578c3ea3Sjsing indata += 8; 220578c3ea3Sjsing outdata += 8; 221*902bfdcaSjsing 222578c3ea3Sjsing if (--i == 0) 223578c3ea3Sjsing break; 224578c3ea3Sjsing } 225578c3ea3Sjsing } 226578c3ea3Sjsing i = len&0x07; 227578c3ea3Sjsing if (i) { 228578c3ea3Sjsing for (;;) { 229*902bfdcaSjsing RC4_LOOP(indata[0], outdata[0]); 230578c3ea3Sjsing if (--i == 0) 231578c3ea3Sjsing break; 232*902bfdcaSjsing RC4_LOOP(indata[1], outdata[1]); 233578c3ea3Sjsing if (--i == 0) 234578c3ea3Sjsing break; 235*902bfdcaSjsing RC4_LOOP(indata[2], outdata[2]); 236578c3ea3Sjsing if (--i == 0) 237578c3ea3Sjsing break; 238*902bfdcaSjsing RC4_LOOP(indata[3], outdata[3]); 239578c3ea3Sjsing if (--i == 0) 240578c3ea3Sjsing break; 241*902bfdcaSjsing RC4_LOOP(indata[4], outdata[4]); 242578c3ea3Sjsing if (--i == 0) 243578c3ea3Sjsing break; 244*902bfdcaSjsing RC4_LOOP(indata[5], outdata[5]); 245578c3ea3Sjsing if (--i == 0) 246578c3ea3Sjsing break; 247*902bfdcaSjsing RC4_LOOP(indata[6], outdata[6]); 248578c3ea3Sjsing if (--i == 0) 249578c3ea3Sjsing break; 250578c3ea3Sjsing } 251578c3ea3Sjsing } 252578c3ea3Sjsing key->x = x; 253578c3ea3Sjsing key->y = y; 254578c3ea3Sjsing } 255a0d14d73Sjsing #endif 256578c3ea3Sjsing 257a0d14d73Sjsing #ifdef HAVE_RC4_SET_KEY_INTERNAL 258a0d14d73Sjsing void rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data); 259a0d14d73Sjsing 260a0d14d73Sjsing #else 2613715bfbfSjsing static inline void 262a0d14d73Sjsing rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data) 263578c3ea3Sjsing { 264578c3ea3Sjsing RC4_INT tmp; 265578c3ea3Sjsing int id1, id2; 266578c3ea3Sjsing RC4_INT *d; 267578c3ea3Sjsing unsigned int i; 268578c3ea3Sjsing 269578c3ea3Sjsing d = &(key->data[0]); 270578c3ea3Sjsing key->x = 0; 271578c3ea3Sjsing key->y = 0; 272578c3ea3Sjsing id1 = id2 = 0; 273578c3ea3Sjsing 274578c3ea3Sjsing #define SK_LOOP(d,n) { \ 275578c3ea3Sjsing tmp=d[(n)]; \ 276578c3ea3Sjsing id2 = (data[id1] + tmp + id2) & 0xff; \ 277578c3ea3Sjsing if (++id1 == len) id1=0; \ 278578c3ea3Sjsing d[(n)]=d[id2]; \ 279578c3ea3Sjsing d[id2]=tmp; } 280578c3ea3Sjsing 281578c3ea3Sjsing for (i = 0; i < 256; i++) 282578c3ea3Sjsing d[i] = i; 283578c3ea3Sjsing for (i = 0; i < 256; i += 4) { 284578c3ea3Sjsing SK_LOOP(d, i + 0); 285578c3ea3Sjsing SK_LOOP(d, i + 1); 286578c3ea3Sjsing SK_LOOP(d, i + 2); 287578c3ea3Sjsing SK_LOOP(d, i + 3); 288578c3ea3Sjsing } 289578c3ea3Sjsing } 290a0d14d73Sjsing #endif 291a0d14d73Sjsing 292a0d14d73Sjsing void 293a0d14d73Sjsing RC4(RC4_KEY *key, size_t len, const unsigned char *indata, 294a0d14d73Sjsing unsigned char *outdata) 295a0d14d73Sjsing { 296a0d14d73Sjsing rc4_internal(key, len, indata, outdata); 297a0d14d73Sjsing } 29842e47793Sjoshua LCRYPTO_ALIAS(RC4); 299a0d14d73Sjsing 300a0d14d73Sjsing void 301a0d14d73Sjsing RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) 302a0d14d73Sjsing { 303a0d14d73Sjsing rc4_set_key_internal(key, len, data); 304a0d14d73Sjsing } 30542e47793Sjoshua LCRYPTO_ALIAS(RC4_set_key); 306