xref: /dflybsd-src/contrib/gcc-8.0/libstdc++-v3/config/cpu/i486/opt/ext/opt_random.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj // Optimizations for random number extensions, x86 version -*- C++ -*-
2*38fd1498Szrj 
3*38fd1498Szrj // Copyright (C) 2012-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj 
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj 
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj 
25*38fd1498Szrj /** @file ext/random.tcc
26*38fd1498Szrj  *  This is an internal header file, included by other library headers.
27*38fd1498Szrj  *  Do not attempt to use it directly. @headername{ext/random}
28*38fd1498Szrj  */
29*38fd1498Szrj 
30*38fd1498Szrj #ifndef _EXT_OPT_RANDOM_H
31*38fd1498Szrj #define _EXT_OPT_RANDOM_H 1
32*38fd1498Szrj 
33*38fd1498Szrj #pragma GCC system_header
34*38fd1498Szrj 
35*38fd1498Szrj #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
36*38fd1498Szrj 
37*38fd1498Szrj #ifdef __SSE2__
38*38fd1498Szrj 
_GLIBCXX_VISIBILITY(default)39*38fd1498Szrj namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
40*38fd1498Szrj {
41*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
42*38fd1498Szrj 
43*38fd1498Szrj   namespace {
44*38fd1498Szrj 
45*38fd1498Szrj     template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
46*38fd1498Szrj 	     uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
47*38fd1498Szrj       inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
48*38fd1498Szrj 				      __m128i __c, __m128i __d)
49*38fd1498Szrj       {
50*38fd1498Szrj 	__m128i __y = _mm_srli_epi32(__b, __sr1);
51*38fd1498Szrj 	__m128i __z = _mm_srli_si128(__c, __sr2);
52*38fd1498Szrj 	__m128i __v = _mm_slli_epi32(__d, __sl1);
53*38fd1498Szrj 	__z = _mm_xor_si128(__z, __a);
54*38fd1498Szrj 	__z = _mm_xor_si128(__z, __v);
55*38fd1498Szrj 	__m128i __x = _mm_slli_si128(__a, __sl2);
56*38fd1498Szrj 	__y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
57*38fd1498Szrj 	__z = _mm_xor_si128(__z, __x);
58*38fd1498Szrj 	return _mm_xor_si128(__z, __y);
59*38fd1498Szrj       }
60*38fd1498Szrj 
61*38fd1498Szrj   }
62*38fd1498Szrj 
63*38fd1498Szrj 
64*38fd1498Szrj #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
65*38fd1498Szrj   template<typename _UIntType, size_t __m,
66*38fd1498Szrj 	   size_t __pos1, size_t __sl1, size_t __sl2,
67*38fd1498Szrj 	   size_t __sr1, size_t __sr2,
68*38fd1498Szrj 	   uint32_t __msk1, uint32_t __msk2,
69*38fd1498Szrj 	   uint32_t __msk3, uint32_t __msk4,
70*38fd1498Szrj 	   uint32_t __parity1, uint32_t __parity2,
71*38fd1498Szrj 	   uint32_t __parity3, uint32_t __parity4>
72*38fd1498Szrj     void simd_fast_mersenne_twister_engine<_UIntType, __m,
73*38fd1498Szrj 					   __pos1, __sl1, __sl2, __sr1, __sr2,
74*38fd1498Szrj 					   __msk1, __msk2, __msk3, __msk4,
75*38fd1498Szrj 					   __parity1, __parity2, __parity3,
76*38fd1498Szrj 					   __parity4>::
77*38fd1498Szrj     _M_gen_rand(void)
78*38fd1498Szrj     {
79*38fd1498Szrj       __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
80*38fd1498Szrj       __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
81*38fd1498Szrj 
82*38fd1498Szrj       size_t __i;
83*38fd1498Szrj       for (__i = 0; __i < _M_nstate - __pos1; ++__i)
84*38fd1498Szrj 	{
85*38fd1498Szrj 	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
86*38fd1498Szrj 					 __msk1, __msk2, __msk3, __msk4>
87*38fd1498Szrj 	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
88*38fd1498Szrj 	  _mm_store_si128(&_M_state[__i], __r);
89*38fd1498Szrj 	  __r1 = __r2;
90*38fd1498Szrj 	  __r2 = __r;
91*38fd1498Szrj 	}
92*38fd1498Szrj       for (; __i < _M_nstate; ++__i)
93*38fd1498Szrj 	{
94*38fd1498Szrj 	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
95*38fd1498Szrj 					 __msk1, __msk2, __msk3, __msk4>
96*38fd1498Szrj 	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
97*38fd1498Szrj 	  _mm_store_si128(&_M_state[__i], __r);
98*38fd1498Szrj 	  __r1 = __r2;
99*38fd1498Szrj 	  __r2 = __r;
100*38fd1498Szrj 	}
101*38fd1498Szrj 
102*38fd1498Szrj       _M_pos = 0;
103*38fd1498Szrj     }
104*38fd1498Szrj 
105*38fd1498Szrj 
106*38fd1498Szrj #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
107*38fd1498Szrj   template<typename _UIntType, size_t __m,
108*38fd1498Szrj 	   size_t __pos1, size_t __sl1, size_t __sl2,
109*38fd1498Szrj 	   size_t __sr1, size_t __sr2,
110*38fd1498Szrj 	   uint32_t __msk1, uint32_t __msk2,
111*38fd1498Szrj 	   uint32_t __msk3, uint32_t __msk4,
112*38fd1498Szrj 	   uint32_t __parity1, uint32_t __parity2,
113*38fd1498Szrj 	   uint32_t __parity3, uint32_t __parity4>
114*38fd1498Szrj     bool
115*38fd1498Szrj     operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
116*38fd1498Szrj 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
117*38fd1498Szrj 	       __msk1, __msk2, __msk3, __msk4,
118*38fd1498Szrj 	       __parity1, __parity2, __parity3, __parity4>& __lhs,
119*38fd1498Szrj 	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
120*38fd1498Szrj 	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
121*38fd1498Szrj 	       __msk1, __msk2, __msk3, __msk4,
122*38fd1498Szrj 	       __parity1, __parity2, __parity3, __parity4>& __rhs)
123*38fd1498Szrj     {
124*38fd1498Szrj       __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
125*38fd1498Szrj       for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
126*38fd1498Szrj 	__res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
127*38fd1498Szrj 						    __rhs._M_state[__i]));
128*38fd1498Szrj       return (_mm_movemask_epi8(__res) == 0xffff
129*38fd1498Szrj 	      && __lhs._M_pos == __rhs._M_pos);
130*38fd1498Szrj     }
131*38fd1498Szrj 
132*38fd1498Szrj 
133*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
134*38fd1498Szrj } // namespace
135*38fd1498Szrj 
136*38fd1498Szrj #endif // __SSE2__
137*38fd1498Szrj 
138*38fd1498Szrj #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
139*38fd1498Szrj 
140*38fd1498Szrj #endif // _EXT_OPT_RANDOM_H
141