11015Swesolows /* 21015Swesolows * CDDL HEADER START 31015Swesolows * 41015Swesolows * The contents of this file are subject to the terms of the 51015Swesolows * Common Development and Distribution License (the "License"). 61015Swesolows * You may not use this file except in compliance with the License. 71015Swesolows * 81015Swesolows * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91015Swesolows * or http://www.opensolaris.org/os/licensing. 101015Swesolows * See the License for the specific language governing permissions 111015Swesolows * and limitations under the License. 121015Swesolows * 131015Swesolows * When distributing Covered Code, include this CDDL HEADER in each 141015Swesolows * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151015Swesolows * If applicable, add the following below this CDDL HEADER, with the 161015Swesolows * fields enclosed by brackets "[]" replaced with your own identifying 171015Swesolows * information: Portions Copyright [yyyy] [name of copyright owner] 181015Swesolows * 191015Swesolows * CDDL HEADER END 201015Swesolows */ 211015Swesolows 221015Swesolows /* 23*11141Sopensolaris@drydog.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 241015Swesolows * Use is subject to license terms. 251015Swesolows */ 261015Swesolows 271015Swesolows #ifndef _MD5_BYTESWAP_H 281015Swesolows #define _MD5_BYTESWAP_H 291015Swesolows 301015Swesolows /* 311015Swesolows * definitions for inline functions for little-endian loads. 321015Swesolows * 331015Swesolows * This file has special definitions for UltraSPARC architectures, 341015Swesolows * which have a special address space identifier for loading 32 and 16 bit 351015Swesolows * integers in little-endian byte order. 361015Swesolows * 371015Swesolows * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the 381015Swesolows * same thing and must be changed together. 391015Swesolows */ 401015Swesolows 417421SDaniel.Anderson@Sun.COM #include <sys/types.h> 421015Swesolows #if defined(__sparc) 431015Swesolows #include <v9/sys/asi.h> 447421SDaniel.Anderson@Sun.COM #elif defined(_LITTLE_ENDIAN) 457421SDaniel.Anderson@Sun.COM #include <sys/byteorder.h> 461015Swesolows #endif 471015Swesolows 481015Swesolows #ifdef __cplusplus 491015Swesolows extern "C" { 501015Swesolows #endif 511015Swesolows 521015Swesolows #if defined(_LITTLE_ENDIAN) 531015Swesolows 541015Swesolows /* 551015Swesolows * Little-endian optimization: I don't need to do any weirdness. On 561015Swesolows * some little-endian boxen, I'll have to do alignment checks, but I can do 571015Swesolows * that below. 581015Swesolows */ 591015Swesolows 601015Swesolows #if !defined(__i386) && !defined(__amd64) 611015Swesolows /* 621015Swesolows * i386 and amd64 don't require aligned 4-byte loads. The symbol 631015Swesolows * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 641015Swesolows * requires alignment checking. 651015Swesolows */ 661015Swesolows #define _MD5_CHECK_ALIGNMENT 671015Swesolows #endif /* !__i386 && !__amd64 */ 681015Swesolows 69*11141Sopensolaris@drydog.com #define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr)) 701015Swesolows 711015Swesolows #else /* !_LITTLE_ENDIAN */ 721015Swesolows 731015Swesolows /* 741015Swesolows * sparc v9/v8plus optimization: 751015Swesolows * 761015Swesolows * on the sparc v9/v8plus, we can load data little endian. however, since 771015Swesolows * the compiler doesn't have direct support for little endian, we 781015Swesolows * link to an assembly-language routine `load_little_32' to do 791015Swesolows * the magic. note that special care must be taken to ensure the 801015Swesolows * address is 32-bit aligned -- in the interest of speed, we don't 811015Swesolows * check to make sure, since careful programming can guarantee this 821015Swesolows * for us. 831015Swesolows */ 841015Swesolows #if defined(sun4u) 851015Swesolows 861015Swesolows /* Define alignment check because we can 4-byte load as little endian. */ 871015Swesolows #define _MD5_CHECK_ALIGNMENT 88*11141Sopensolaris@drydog.com #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr)) 891015Swesolows 901015Swesolows #if !defined(__lint) && defined(__GNUC__) 911015Swesolows 921015Swesolows static __inline__ uint32_t 931015Swesolows load_little_32(uint32_t *addr) 941015Swesolows { 951015Swesolows uint32_t value; 961015Swesolows 971015Swesolows __asm__( 981015Swesolows "lduwa [%1] %2, %0\n\t" 997421SDaniel.Anderson@Sun.COM : "=r" (value) 1007421SDaniel.Anderson@Sun.COM : "r" (addr), "i" (ASI_PL)); 1011015Swesolows 1021015Swesolows return (value); 1031015Swesolows } 1041015Swesolows #endif /* !__lint && __GNUC__ */ 1051015Swesolows 1061015Swesolows #if !defined(__GNUC__) 1071015Swesolows extern uint32_t load_little_32(uint32_t *); 1081015Swesolows #endif /* !__GNUC__ */ 1091015Swesolows 1101694Sdarrenm /* Placate lint */ 1111694Sdarrenm #if defined(__lint) 1121694Sdarrenm uint32_t 1131694Sdarrenm load_little_32(uint32_t *addr) 1141694Sdarrenm { 1151694Sdarrenm return (*addr); 1161694Sdarrenm } 1171694Sdarrenm #endif /* __lint */ 1181694Sdarrenm 1197421SDaniel.Anderson@Sun.COM #elif defined(_LITTLE_ENDIAN) 1207421SDaniel.Anderson@Sun.COM #define LOAD_LITTLE_32(addr) htonl(addr) 1211694Sdarrenm 1227421SDaniel.Anderson@Sun.COM #else 1231694Sdarrenm /* big endian -- will work on little endian, but slowly */ 1241694Sdarrenm /* Since we do byte operations, we don't have to check for alignment. */ 1251694Sdarrenm #define LOAD_LITTLE_32(addr) \ 1261694Sdarrenm ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 1271694Sdarrenm #endif /* sun4u */ 1281694Sdarrenm 1291015Swesolows #if defined(sun4v) 1301015Swesolows 1311015Swesolows /* 1321015Swesolows * For N1 want to minimize number of arithmetic operations. This is best 1331015Swesolows * achieved by using the %asi register to specify ASI for the lduwa operations. 1341015Swesolows * Also, have a separate inline template for each word, so can utilize the 1351015Swesolows * immediate offset in lduwa, without relying on the compiler to do the right 1361015Swesolows * thing. 1371015Swesolows * 1381015Swesolows * Moving to 64-bit loads might also be beneficial. 1391015Swesolows */ 1401015Swesolows #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 1411015Swesolows #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 1421015Swesolows #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 1431015Swesolows #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 1441015Swesolows #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 1451015Swesolows #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 1461015Swesolows #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 1471015Swesolows #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 1481015Swesolows #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 1491015Swesolows #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 1501015Swesolows #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 1511015Swesolows #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 1521015Swesolows #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 1531015Swesolows #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 1541015Swesolows #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 1551015Swesolows #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 1561015Swesolows 1571015Swesolows #if !defined(__lint) && defined(__GNUC__) 1581015Swesolows 1591015Swesolows /* 1601015Swesolows * This actually sets the ASI register, not necessarily to ASI_PL. 1611015Swesolows */ 1621015Swesolows static __inline__ void 1631015Swesolows set_little(uint8_t asi) 1641015Swesolows { 1651015Swesolows __asm__ __volatile__( 1667421SDaniel.Anderson@Sun.COM "wr %%g0, %0, %%asi\n\t" 1677421SDaniel.Anderson@Sun.COM : /* Nothing */ 1687421SDaniel.Anderson@Sun.COM : "r" (asi)); 1691015Swesolows } 1701015Swesolows 1711015Swesolows static __inline__ uint8_t 1721015Swesolows get_little(void) 1731015Swesolows { 1741015Swesolows uint8_t asi; 1751015Swesolows 1761015Swesolows __asm__ __volatile__( 1777421SDaniel.Anderson@Sun.COM "rd %%asi, %0\n\t" 1787421SDaniel.Anderson@Sun.COM : "=r" (asi)); 1791015Swesolows 1801015Swesolows return (asi); 1811015Swesolows } 1821015Swesolows 1831015Swesolows /* 1841015Swesolows * We have 16 functions which differ only in the offset from which they 1851015Swesolows * load. Use this preprocessor template to simplify maintenance. Its 1861015Swesolows * argument is the offset in hex, without the 0x. 1871015Swesolows */ 1881015Swesolows #define LL_TEMPLATE(__off) \ 1891015Swesolows static __inline__ uint32_t \ 1901015Swesolows load_little_32_##__off(uint32_t *addr) \ 1911015Swesolows { \ 1921015Swesolows uint32_t value; \ 1931015Swesolows __asm__( \ 1941015Swesolows "lduwa [%1 + %2]%%asi, %0\n\t" \ 1951015Swesolows : "=r" (value) \ 1961015Swesolows : "r" (addr), "i" ((0x##__off) << 2)); \ 1971015Swesolows return (value); \ 1981015Swesolows } 1991015Swesolows 2001015Swesolows LL_TEMPLATE(0) 2011015Swesolows LL_TEMPLATE(1) 2021015Swesolows LL_TEMPLATE(2) 2031015Swesolows LL_TEMPLATE(3) 2041015Swesolows LL_TEMPLATE(4) 2051015Swesolows LL_TEMPLATE(5) 2061015Swesolows LL_TEMPLATE(6) 2071015Swesolows LL_TEMPLATE(7) 2081015Swesolows LL_TEMPLATE(8) 2091015Swesolows LL_TEMPLATE(9) 2101015Swesolows LL_TEMPLATE(a) 2111015Swesolows LL_TEMPLATE(b) 2121015Swesolows LL_TEMPLATE(c) 2131015Swesolows LL_TEMPLATE(d) 2141015Swesolows LL_TEMPLATE(e) 2151015Swesolows LL_TEMPLATE(f) 2161015Swesolows #undef LL_TEMPLATE 2171015Swesolows 2181015Swesolows #endif /* !__lint && __GNUC__ */ 2191015Swesolows 2201015Swesolows #if !defined(__GNUC__) 2211015Swesolows /* 2221015Swesolows * Using the %asi register to achieve little endian loads - register 2231015Swesolows * is set using a inline template. 2241015Swesolows * 2251015Swesolows * Saves a few arithmetic ops as can now use an immediate offset with the 2261015Swesolows * lduwa instructions. 2271015Swesolows */ 2281015Swesolows extern void set_little(uint32_t); 2291015Swesolows extern uint32_t get_little(void); 2301015Swesolows 2311015Swesolows extern uint32_t load_little_32_0(uint32_t *); 2321015Swesolows extern uint32_t load_little_32_1(uint32_t *); 2331015Swesolows extern uint32_t load_little_32_2(uint32_t *); 2341015Swesolows extern uint32_t load_little_32_3(uint32_t *); 2351015Swesolows extern uint32_t load_little_32_4(uint32_t *); 2361015Swesolows extern uint32_t load_little_32_5(uint32_t *); 2371015Swesolows extern uint32_t load_little_32_6(uint32_t *); 2381015Swesolows extern uint32_t load_little_32_7(uint32_t *); 2391015Swesolows extern uint32_t load_little_32_8(uint32_t *); 2401015Swesolows extern uint32_t load_little_32_9(uint32_t *); 2411015Swesolows extern uint32_t load_little_32_a(uint32_t *); 2421015Swesolows extern uint32_t load_little_32_b(uint32_t *); 2431015Swesolows extern uint32_t load_little_32_c(uint32_t *); 2441015Swesolows extern uint32_t load_little_32_d(uint32_t *); 2451015Swesolows extern uint32_t load_little_32_e(uint32_t *); 2461015Swesolows extern uint32_t load_little_32_f(uint32_t *); 2471015Swesolows #endif /* !__GNUC__ */ 2481015Swesolows #endif /* sun4v */ 2491015Swesolows 2501015Swesolows #endif /* _LITTLE_ENDIAN */ 2511015Swesolows 2521015Swesolows #ifdef __cplusplus 2531015Swesolows } 2541015Swesolows #endif 2551015Swesolows 2561015Swesolows #endif /* !_MD5_BYTESWAP_H */ 257