xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/vector-builder.h (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1cef8759bSmrg /* A class for building vector constant patterns.
2*4c3eb207Smrg    Copyright (C) 2017-2020 Free Software Foundation, Inc.
3cef8759bSmrg 
4cef8759bSmrg This file is part of GCC.
5cef8759bSmrg 
6cef8759bSmrg GCC is free software; you can redistribute it and/or modify it under
7cef8759bSmrg the terms of the GNU General Public License as published by the Free
8cef8759bSmrg Software Foundation; either version 3, or (at your option) any later
9cef8759bSmrg version.
10cef8759bSmrg 
11cef8759bSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12cef8759bSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
13cef8759bSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14cef8759bSmrg for more details.
15cef8759bSmrg 
16cef8759bSmrg You should have received a copy of the GNU General Public License
17cef8759bSmrg along with GCC; see the file COPYING3.  If not see
18cef8759bSmrg <http://www.gnu.org/licenses/>.  */
19cef8759bSmrg 
20cef8759bSmrg #ifndef GCC_VECTOR_BUILDER_H
21cef8759bSmrg #define GCC_VECTOR_BUILDER_H
22cef8759bSmrg 
23cef8759bSmrg /* This class is a wrapper around auto_vec<T> for building vectors of T.
24cef8759bSmrg    It aims to encode each vector as npatterns interleaved patterns,
25cef8759bSmrg    where each pattern represents a sequence:
26cef8759bSmrg 
27cef8759bSmrg      { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
28cef8759bSmrg 
29cef8759bSmrg    The first three elements in each pattern provide enough information
30cef8759bSmrg    to derive the other elements.  If all patterns have a STEP of zero,
31cef8759bSmrg    we only need to encode the first two elements in each pattern.
32cef8759bSmrg    If BASE1 is also equal to BASE0 for all patterns, we only need to
33cef8759bSmrg    encode the first element in each pattern.  The number of encoded
34cef8759bSmrg    elements per pattern is given by nelts_per_pattern.
35cef8759bSmrg 
36cef8759bSmrg    The class can be used in two ways:
37cef8759bSmrg 
38cef8759bSmrg    1. It can be used to build a full image of the vector, which is then
39cef8759bSmrg       canonicalized by finalize ().  In this case npatterns is initially
40cef8759bSmrg       the number of elements in the vector and nelts_per_pattern is
41cef8759bSmrg       initially 1.
42cef8759bSmrg 
43cef8759bSmrg    2. It can be used to build a vector that already has a known encoding.
44cef8759bSmrg       This is preferred since it is more efficient and copes with
45cef8759bSmrg       variable-length vectors.  finalize () then canonicalizes the encoding
46cef8759bSmrg       to a simpler form if possible.
47cef8759bSmrg 
48*4c3eb207Smrg    Shape is the type that specifies the number of elements in the vector
49*4c3eb207Smrg    and (where relevant) the type of each element.
50*4c3eb207Smrg 
51*4c3eb207Smrg    The derived class Derived provides the functionality of this class
52*4c3eb207Smrg    for specific Ts.  Derived needs to provide the following interface:
53cef8759bSmrg 
54cef8759bSmrg       bool equal_p (T elt1, T elt2) const;
55cef8759bSmrg 
56cef8759bSmrg 	  Return true if elements ELT1 and ELT2 are equal.
57cef8759bSmrg 
58cef8759bSmrg       bool allow_steps_p () const;
59cef8759bSmrg 
60cef8759bSmrg 	  Return true if a stepped representation is OK.  We don't allow
61cef8759bSmrg 	  linear series for anything other than integers, to avoid problems
62cef8759bSmrg 	  with rounding.
63cef8759bSmrg 
64cef8759bSmrg       bool integral_p (T elt) const;
65cef8759bSmrg 
66cef8759bSmrg 	  Return true if element ELT can be interpreted as an integer.
67cef8759bSmrg 
68cef8759bSmrg       StepType step (T elt1, T elt2) const;
69cef8759bSmrg 
70cef8759bSmrg 	  Return the value of element ELT2 minus the value of element ELT1,
71cef8759bSmrg 	  given integral_p (ELT1) && integral_p (ELT2).  There is no fixed
72cef8759bSmrg 	  choice of StepType.
73cef8759bSmrg 
74cef8759bSmrg       T apply_step (T base, unsigned int factor, StepType step) const;
75cef8759bSmrg 
76cef8759bSmrg 	  Return a vector element with the value BASE + FACTOR * STEP.
77cef8759bSmrg 
78cef8759bSmrg       bool can_elide_p (T elt) const;
79cef8759bSmrg 
80cef8759bSmrg 	  Return true if we can drop element ELT, even if the retained
81cef8759bSmrg 	  elements are different.  This is provided for TREE_OVERFLOW
82cef8759bSmrg 	  handling.
83cef8759bSmrg 
84cef8759bSmrg       void note_representative (T *elt1_ptr, T elt2);
85cef8759bSmrg 
86cef8759bSmrg 	  Record that ELT2 is being elided, given that ELT1_PTR points to
87cef8759bSmrg 	  the last encoded element for the containing pattern.  This is
88*4c3eb207Smrg 	  again provided for TREE_OVERFLOW handling.
89cef8759bSmrg 
90*4c3eb207Smrg       static poly_uint64 shape_nelts (Shape shape);
91*4c3eb207Smrg 
92*4c3eb207Smrg 	  Return the number of elements in SHAPE.
93*4c3eb207Smrg 
94*4c3eb207Smrg     The class provides additional functionality for the case in which
95*4c3eb207Smrg     T can describe a vector constant as well as an individual element.
96*4c3eb207Smrg     This functionality requires:
97*4c3eb207Smrg 
98*4c3eb207Smrg       static poly_uint64 nelts_of (T x);
99*4c3eb207Smrg 
100*4c3eb207Smrg 	  Return the number of elements in vector constant X.
101*4c3eb207Smrg 
102*4c3eb207Smrg       static unsigned int npatterns_of (T x);
103*4c3eb207Smrg 
104*4c3eb207Smrg 	  Return the number of patterns used to encode vector constant X.
105*4c3eb207Smrg 
106*4c3eb207Smrg       static unsigned int nelts_per_pattern_of (T x);
107*4c3eb207Smrg 
108*4c3eb207Smrg 	  Return the number of elements used to encode each pattern
109*4c3eb207Smrg 	  in vector constant X.  */
110*4c3eb207Smrg 
111*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
112cef8759bSmrg class vector_builder : public auto_vec<T, 32>
113cef8759bSmrg {
114cef8759bSmrg public:
115cef8759bSmrg   vector_builder ();
116cef8759bSmrg 
full_nelts()117cef8759bSmrg   poly_uint64 full_nelts () const { return m_full_nelts; }
npatterns()118cef8759bSmrg   unsigned int npatterns () const { return m_npatterns; }
nelts_per_pattern()119cef8759bSmrg   unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
120cef8759bSmrg   unsigned int encoded_nelts () const;
121cef8759bSmrg   bool encoded_full_vector_p () const;
122cef8759bSmrg   T elt (unsigned int) const;
123*4c3eb207Smrg   unsigned int count_dups (int, int, int) const;
124cef8759bSmrg 
125cef8759bSmrg   bool operator == (const Derived &) const;
126cef8759bSmrg   bool operator != (const Derived &x) const { return !operator == (x); }
127cef8759bSmrg 
128*4c3eb207Smrg   bool new_unary_operation (Shape, T, bool);
129*4c3eb207Smrg   bool new_binary_operation (Shape, T, T, bool);
130*4c3eb207Smrg 
131cef8759bSmrg   void finalize ();
132cef8759bSmrg 
133*4c3eb207Smrg   static unsigned int binary_encoded_nelts (T, T);
134*4c3eb207Smrg 
135cef8759bSmrg protected:
136cef8759bSmrg   void new_vector (poly_uint64, unsigned int, unsigned int);
137cef8759bSmrg   void reshape (unsigned int, unsigned int);
138cef8759bSmrg   bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
139cef8759bSmrg   bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
140cef8759bSmrg   bool try_npatterns (unsigned int);
141cef8759bSmrg 
142cef8759bSmrg private:
143cef8759bSmrg   vector_builder (const vector_builder &);
144cef8759bSmrg   vector_builder &operator= (const vector_builder &);
derived()145cef8759bSmrg   Derived *derived () { return static_cast<Derived *> (this); }
146cef8759bSmrg   const Derived *derived () const;
147cef8759bSmrg 
148cef8759bSmrg   poly_uint64 m_full_nelts;
149cef8759bSmrg   unsigned int m_npatterns;
150cef8759bSmrg   unsigned int m_nelts_per_pattern;
151cef8759bSmrg };
152cef8759bSmrg 
153*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
154cef8759bSmrg inline const Derived *
derived()155*4c3eb207Smrg vector_builder<T, Shape, Derived>::derived () const
156cef8759bSmrg {
157cef8759bSmrg   return static_cast<const Derived *> (this);
158cef8759bSmrg }
159cef8759bSmrg 
160*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
161cef8759bSmrg inline
vector_builder()162*4c3eb207Smrg vector_builder<T, Shape, Derived>::vector_builder ()
163cef8759bSmrg   : m_full_nelts (0),
164cef8759bSmrg     m_npatterns (0),
165cef8759bSmrg     m_nelts_per_pattern (0)
166cef8759bSmrg {}
167cef8759bSmrg 
168cef8759bSmrg /* Return the number of elements that are explicitly encoded.  The vec
169cef8759bSmrg    starts with these explicitly-encoded elements and may contain additional
170cef8759bSmrg    elided elements.  */
171cef8759bSmrg 
172*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
173cef8759bSmrg inline unsigned int
encoded_nelts()174*4c3eb207Smrg vector_builder<T, Shape, Derived>::encoded_nelts () const
175cef8759bSmrg {
176cef8759bSmrg   return m_npatterns * m_nelts_per_pattern;
177cef8759bSmrg }
178cef8759bSmrg 
179cef8759bSmrg /* Return true if every element of the vector is explicitly encoded.  */
180cef8759bSmrg 
181*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
182cef8759bSmrg inline bool
encoded_full_vector_p()183*4c3eb207Smrg vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
184cef8759bSmrg {
185cef8759bSmrg   return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
186cef8759bSmrg }
187cef8759bSmrg 
188cef8759bSmrg /* Start building a vector that has FULL_NELTS elements.  Initially
189cef8759bSmrg    encode it using NPATTERNS patterns with NELTS_PER_PATTERN each.  */
190cef8759bSmrg 
191*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
192cef8759bSmrg void
new_vector(poly_uint64 full_nelts,unsigned int npatterns,unsigned int nelts_per_pattern)193*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
194cef8759bSmrg 					       unsigned int npatterns,
195cef8759bSmrg 					       unsigned int nelts_per_pattern)
196cef8759bSmrg {
197cef8759bSmrg   m_full_nelts = full_nelts;
198cef8759bSmrg   m_npatterns = npatterns;
199cef8759bSmrg   m_nelts_per_pattern = nelts_per_pattern;
200cef8759bSmrg   this->reserve (encoded_nelts ());
201cef8759bSmrg   this->truncate (0);
202cef8759bSmrg }
203cef8759bSmrg 
204cef8759bSmrg /* Return true if this vector and OTHER have the same elements and
205cef8759bSmrg    are encoded in the same way.  */
206cef8759bSmrg 
207*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
208cef8759bSmrg bool
209*4c3eb207Smrg vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
210cef8759bSmrg {
211cef8759bSmrg   if (maybe_ne (m_full_nelts, other.m_full_nelts)
212cef8759bSmrg       || m_npatterns != other.m_npatterns
213cef8759bSmrg       || m_nelts_per_pattern != other.m_nelts_per_pattern)
214cef8759bSmrg     return false;
215cef8759bSmrg 
216cef8759bSmrg   unsigned int nelts = encoded_nelts ();
217cef8759bSmrg   for (unsigned int i = 0; i < nelts; ++i)
218cef8759bSmrg     if (!derived ()->equal_p ((*this)[i], other[i]))
219cef8759bSmrg       return false;
220cef8759bSmrg 
221cef8759bSmrg   return true;
222cef8759bSmrg }
223cef8759bSmrg 
224cef8759bSmrg /* Return the value of vector element I, which might or might not be
225cef8759bSmrg    encoded explicitly.  */
226cef8759bSmrg 
227*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
228cef8759bSmrg T
elt(unsigned int i)229*4c3eb207Smrg vector_builder<T, Shape, Derived>::elt (unsigned int i) const
230cef8759bSmrg {
231cef8759bSmrg   /* First handle elements that are already present in the underlying
232cef8759bSmrg      vector, regardless of whether they're part of the encoding or not.  */
233cef8759bSmrg   if (i < this->length ())
234cef8759bSmrg     return (*this)[i];
235cef8759bSmrg 
236*4c3eb207Smrg   /* Extrapolation is only possible if the encoding has been fully
237*4c3eb207Smrg      populated.  */
238*4c3eb207Smrg   gcc_checking_assert (encoded_nelts () <= this->length ());
239*4c3eb207Smrg 
240cef8759bSmrg   /* Identify the pattern that contains element I and work out the index of
241cef8759bSmrg      the last encoded element for that pattern.  */
242cef8759bSmrg   unsigned int pattern = i % m_npatterns;
243cef8759bSmrg   unsigned int count = i / m_npatterns;
244cef8759bSmrg   unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
245cef8759bSmrg   T final = (*this)[final_i];
246cef8759bSmrg 
247cef8759bSmrg   /* If there are no steps, the final encoded value is the right one.  */
248cef8759bSmrg   if (m_nelts_per_pattern <= 2)
249cef8759bSmrg     return final;
250cef8759bSmrg 
251cef8759bSmrg   /* Otherwise work out the value from the last two encoded elements.  */
252cef8759bSmrg   T prev = (*this)[final_i - m_npatterns];
253cef8759bSmrg   return derived ()->apply_step (final, count - 2,
254cef8759bSmrg 				 derived ()->step (prev, final));
255cef8759bSmrg }
256cef8759bSmrg 
257*4c3eb207Smrg /* Try to start building a new vector of shape SHAPE that holds the result of
258*4c3eb207Smrg    a unary operation on vector constant VEC.  ALLOW_STEPPED_P is true if the
259*4c3eb207Smrg    operation can handle stepped encodings directly, without having to expand
260*4c3eb207Smrg    the full sequence.
261*4c3eb207Smrg 
262*4c3eb207Smrg    Return true if the operation is possible, which it always is when
263*4c3eb207Smrg    ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
264*4c3eb207Smrg 
265*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
266*4c3eb207Smrg bool
new_unary_operation(Shape shape,T vec,bool allow_stepped_p)267*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
268*4c3eb207Smrg 							bool allow_stepped_p)
269*4c3eb207Smrg {
270*4c3eb207Smrg   poly_uint64 full_nelts = Derived::shape_nelts (shape);
271*4c3eb207Smrg   gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
272*4c3eb207Smrg   unsigned int npatterns = Derived::npatterns_of (vec);
273*4c3eb207Smrg   unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
274*4c3eb207Smrg   if (!allow_stepped_p && nelts_per_pattern > 2)
275*4c3eb207Smrg     {
276*4c3eb207Smrg       if (!full_nelts.is_constant ())
277*4c3eb207Smrg 	return false;
278*4c3eb207Smrg       npatterns = full_nelts.to_constant ();
279*4c3eb207Smrg       nelts_per_pattern = 1;
280*4c3eb207Smrg     }
281*4c3eb207Smrg   derived ()->new_vector (shape, npatterns, nelts_per_pattern);
282*4c3eb207Smrg   return true;
283*4c3eb207Smrg }
284*4c3eb207Smrg 
285*4c3eb207Smrg /* Try to start building a new vector of shape SHAPE that holds the result of
286*4c3eb207Smrg    a binary operation on vector constants VEC1 and VEC2.  ALLOW_STEPPED_P is
287*4c3eb207Smrg    true if the operation can handle stepped encodings directly, without
288*4c3eb207Smrg    having to expand the full sequence.
289*4c3eb207Smrg 
290*4c3eb207Smrg    Return true if the operation is possible.  Leave the builder unchanged
291*4c3eb207Smrg    otherwise.  */
292*4c3eb207Smrg 
293*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
294*4c3eb207Smrg bool
new_binary_operation(Shape shape,T vec1,T vec2,bool allow_stepped_p)295*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
296*4c3eb207Smrg 							 T vec1, T vec2,
297*4c3eb207Smrg 							 bool allow_stepped_p)
298*4c3eb207Smrg {
299*4c3eb207Smrg   poly_uint64 full_nelts = Derived::shape_nelts (shape);
300*4c3eb207Smrg   gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
301*4c3eb207Smrg 	      && known_eq (full_nelts, Derived::nelts_of (vec2)));
302*4c3eb207Smrg   /* Conceptually we split the patterns in VEC1 and VEC2 until we have
303*4c3eb207Smrg      an equal number for both.  Each split pattern requires the same
304*4c3eb207Smrg      number of elements per pattern as the original.  E.g. splitting:
305*4c3eb207Smrg 
306*4c3eb207Smrg        { 1, 2, 3, ... }
307*4c3eb207Smrg 
308*4c3eb207Smrg      into two gives:
309*4c3eb207Smrg 
310*4c3eb207Smrg        { 1, 3, 5, ... }
311*4c3eb207Smrg        { 2, 4, 6, ... }
312*4c3eb207Smrg 
313*4c3eb207Smrg      while splitting:
314*4c3eb207Smrg 
315*4c3eb207Smrg        { 1, 0, ... }
316*4c3eb207Smrg 
317*4c3eb207Smrg      into two gives:
318*4c3eb207Smrg 
319*4c3eb207Smrg        { 1, 0, ... }
320*4c3eb207Smrg        { 0, 0, ... }.  */
321*4c3eb207Smrg   unsigned int npatterns
322*4c3eb207Smrg     = least_common_multiple (Derived::npatterns_of (vec1),
323*4c3eb207Smrg 			     Derived::npatterns_of (vec2));
324*4c3eb207Smrg   unsigned int nelts_per_pattern
325*4c3eb207Smrg     = MAX (Derived::nelts_per_pattern_of (vec1),
326*4c3eb207Smrg 	   Derived::nelts_per_pattern_of (vec2));
327*4c3eb207Smrg   if (!allow_stepped_p && nelts_per_pattern > 2)
328*4c3eb207Smrg     {
329*4c3eb207Smrg       if (!full_nelts.is_constant ())
330*4c3eb207Smrg 	return false;
331*4c3eb207Smrg       npatterns = full_nelts.to_constant ();
332*4c3eb207Smrg       nelts_per_pattern = 1;
333*4c3eb207Smrg     }
334*4c3eb207Smrg   derived ()->new_vector (shape, npatterns, nelts_per_pattern);
335*4c3eb207Smrg   return true;
336*4c3eb207Smrg }
337*4c3eb207Smrg 
338*4c3eb207Smrg /* Return the number of elements that the caller needs to operate on in
339*4c3eb207Smrg    order to handle a binary operation on vector constants VEC1 and VEC2.
340*4c3eb207Smrg    This static function is used instead of new_binary_operation if the
341*4c3eb207Smrg    result of the operation is not a constant vector.  */
342*4c3eb207Smrg 
343*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
344*4c3eb207Smrg unsigned int
binary_encoded_nelts(T vec1,T vec2)345*4c3eb207Smrg vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
346*4c3eb207Smrg {
347*4c3eb207Smrg   poly_uint64 nelts = Derived::nelts_of (vec1);
348*4c3eb207Smrg   gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
349*4c3eb207Smrg   /* See new_binary_operation for details.  */
350*4c3eb207Smrg   unsigned int npatterns
351*4c3eb207Smrg     = least_common_multiple (Derived::npatterns_of (vec1),
352*4c3eb207Smrg 			     Derived::npatterns_of (vec2));
353*4c3eb207Smrg   unsigned int nelts_per_pattern
354*4c3eb207Smrg     = MAX (Derived::nelts_per_pattern_of (vec1),
355*4c3eb207Smrg 	   Derived::nelts_per_pattern_of (vec2));
356*4c3eb207Smrg   unsigned HOST_WIDE_INT const_nelts;
357*4c3eb207Smrg   if (nelts.is_constant (&const_nelts))
358*4c3eb207Smrg     return MIN (npatterns * nelts_per_pattern, const_nelts);
359*4c3eb207Smrg   return npatterns * nelts_per_pattern;
360*4c3eb207Smrg }
361*4c3eb207Smrg 
362*4c3eb207Smrg /* Return the number of leading duplicate elements in the range
363*4c3eb207Smrg    [START:END:STEP].  The value is always at least 1.  */
364*4c3eb207Smrg 
365*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
366*4c3eb207Smrg unsigned int
count_dups(int start,int end,int step)367*4c3eb207Smrg vector_builder<T, Shape, Derived>::count_dups (int start, int end,
368*4c3eb207Smrg 					       int step) const
369*4c3eb207Smrg {
370*4c3eb207Smrg   gcc_assert ((end - start) % step == 0);
371*4c3eb207Smrg 
372*4c3eb207Smrg   unsigned int ndups = 1;
373*4c3eb207Smrg   for (int i = start + step;
374*4c3eb207Smrg        i != end && derived ()->equal_p (elt (i), elt (start));
375*4c3eb207Smrg        i += step)
376*4c3eb207Smrg     ndups++;
377*4c3eb207Smrg   return ndups;
378*4c3eb207Smrg }
379*4c3eb207Smrg 
380cef8759bSmrg /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
381cef8759bSmrg    but without changing the underlying vector.  */
382cef8759bSmrg 
383*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
384cef8759bSmrg void
reshape(unsigned int npatterns,unsigned int nelts_per_pattern)385*4c3eb207Smrg vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
386cef8759bSmrg 					    unsigned int nelts_per_pattern)
387cef8759bSmrg {
388cef8759bSmrg   unsigned int old_encoded_nelts = encoded_nelts ();
389cef8759bSmrg   unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
390cef8759bSmrg   gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
391cef8759bSmrg   unsigned int next = new_encoded_nelts - npatterns;
392cef8759bSmrg   for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
393cef8759bSmrg     {
394cef8759bSmrg       derived ()->note_representative (&(*this)[next], (*this)[i]);
395cef8759bSmrg       next += 1;
396cef8759bSmrg       if (next == new_encoded_nelts)
397cef8759bSmrg 	next -= npatterns;
398cef8759bSmrg     }
399cef8759bSmrg   m_npatterns = npatterns;
400cef8759bSmrg   m_nelts_per_pattern = nelts_per_pattern;
401cef8759bSmrg }
402cef8759bSmrg 
403cef8759bSmrg /* Return true if elements [START, END) contain a repeating sequence of
404cef8759bSmrg    STEP elements.  */
405cef8759bSmrg 
406*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
407cef8759bSmrg bool
repeating_sequence_p(unsigned int start,unsigned int end,unsigned int step)408*4c3eb207Smrg vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
409cef8759bSmrg 							 unsigned int end,
410cef8759bSmrg 							 unsigned int step)
411cef8759bSmrg {
412cef8759bSmrg   for (unsigned int i = start; i < end - step; ++i)
413cef8759bSmrg     if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
414cef8759bSmrg       return false;
415cef8759bSmrg   return true;
416cef8759bSmrg }
417cef8759bSmrg 
418cef8759bSmrg /* Return true if elements [START, END) contain STEP interleaved linear
419cef8759bSmrg    series.  */
420cef8759bSmrg 
421*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
422cef8759bSmrg bool
stepped_sequence_p(unsigned int start,unsigned int end,unsigned int step)423*4c3eb207Smrg vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
424cef8759bSmrg 						       unsigned int end,
425cef8759bSmrg 						       unsigned int step)
426cef8759bSmrg {
427cef8759bSmrg   if (!derived ()->allow_steps_p ())
428cef8759bSmrg     return false;
429cef8759bSmrg 
430cef8759bSmrg   for (unsigned int i = start + step * 2; i < end; ++i)
431cef8759bSmrg     {
432cef8759bSmrg       T elt1 = (*this)[i - step * 2];
433cef8759bSmrg       T elt2 = (*this)[i - step];
434cef8759bSmrg       T elt3 = (*this)[i];
435cef8759bSmrg 
436cef8759bSmrg       if (!derived ()->integral_p (elt1)
437cef8759bSmrg 	  || !derived ()->integral_p (elt2)
438cef8759bSmrg 	  || !derived ()->integral_p (elt3))
439cef8759bSmrg 	return false;
440cef8759bSmrg 
441cef8759bSmrg       if (maybe_ne (derived ()->step (elt1, elt2),
442cef8759bSmrg 		    derived ()->step (elt2, elt3)))
443cef8759bSmrg 	return false;
444cef8759bSmrg 
445cef8759bSmrg       if (!derived ()->can_elide_p (elt3))
446cef8759bSmrg 	return false;
447cef8759bSmrg     }
448cef8759bSmrg   return true;
449cef8759bSmrg }
450cef8759bSmrg 
451cef8759bSmrg /* Try to change the number of encoded patterns to NPATTERNS, returning
452cef8759bSmrg    true on success.  */
453cef8759bSmrg 
454*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
455cef8759bSmrg bool
try_npatterns(unsigned int npatterns)456*4c3eb207Smrg vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
457cef8759bSmrg {
458cef8759bSmrg   if (m_nelts_per_pattern == 1)
459cef8759bSmrg     {
460cef8759bSmrg       /* See whether NPATTERNS is valid with the current 1-element-per-pattern
461cef8759bSmrg 	 encoding.  */
462cef8759bSmrg       if (repeating_sequence_p (0, encoded_nelts (), npatterns))
463cef8759bSmrg 	{
464cef8759bSmrg 	  reshape (npatterns, 1);
465cef8759bSmrg 	  return true;
466cef8759bSmrg 	}
467cef8759bSmrg 
468cef8759bSmrg       /* We can only increase the number of elements per pattern if all
469cef8759bSmrg 	 elements are still encoded explicitly.  */
470cef8759bSmrg       if (!encoded_full_vector_p ())
471cef8759bSmrg 	return false;
472cef8759bSmrg     }
473cef8759bSmrg 
474cef8759bSmrg   if (m_nelts_per_pattern <= 2)
475cef8759bSmrg     {
476cef8759bSmrg       /* See whether NPATTERNS is valid with a 2-element-per-pattern
477cef8759bSmrg 	 encoding.  */
478cef8759bSmrg       if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
479cef8759bSmrg 	{
480cef8759bSmrg 	  reshape (npatterns, 2);
481cef8759bSmrg 	  return true;
482cef8759bSmrg 	}
483cef8759bSmrg 
484cef8759bSmrg       /* We can only increase the number of elements per pattern if all
485cef8759bSmrg 	 elements are still encoded explicitly.  */
486cef8759bSmrg       if (!encoded_full_vector_p ())
487cef8759bSmrg 	return false;
488cef8759bSmrg     }
489cef8759bSmrg 
490cef8759bSmrg   if (m_nelts_per_pattern <= 3)
491cef8759bSmrg     {
492cef8759bSmrg       /* See whether we have NPATTERNS interleaved linear series,
493cef8759bSmrg 	 giving a 3-element-per-pattern encoding.  */
494cef8759bSmrg       if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
495cef8759bSmrg 	{
496cef8759bSmrg 	  reshape (npatterns, 3);
497cef8759bSmrg 	  return true;
498cef8759bSmrg 	}
499cef8759bSmrg       return false;
500cef8759bSmrg     }
501cef8759bSmrg 
502cef8759bSmrg   gcc_unreachable ();
503cef8759bSmrg }
504cef8759bSmrg 
505cef8759bSmrg /* Replace the current encoding with the canonical form.  */
506cef8759bSmrg 
507*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
508cef8759bSmrg void
finalize()509*4c3eb207Smrg vector_builder<T, Shape, Derived>::finalize ()
510cef8759bSmrg {
511cef8759bSmrg   /* The encoding requires the same number of elements to come from each
512cef8759bSmrg      pattern.  */
513cef8759bSmrg   gcc_assert (multiple_p (m_full_nelts, m_npatterns));
514cef8759bSmrg 
515cef8759bSmrg   /* Allow the caller to build more elements than necessary.  For example,
516cef8759bSmrg      it's often convenient to build a stepped vector from the natural
517cef8759bSmrg      encoding of three elements even if the vector itself only has two.  */
518cef8759bSmrg   unsigned HOST_WIDE_INT const_full_nelts;
519cef8759bSmrg   if (m_full_nelts.is_constant (&const_full_nelts)
520cef8759bSmrg       && const_full_nelts <= encoded_nelts ())
521cef8759bSmrg     {
522cef8759bSmrg       m_npatterns = const_full_nelts;
523cef8759bSmrg       m_nelts_per_pattern = 1;
524cef8759bSmrg     }
525cef8759bSmrg 
526cef8759bSmrg   /* Try to whittle down the number of elements per pattern.  That is:
527cef8759bSmrg 
528cef8759bSmrg      1. If we have stepped patterns whose steps are all 0, reduce the
529cef8759bSmrg         number of elements per pattern from 3 to 2.
530cef8759bSmrg 
531cef8759bSmrg      2. If we have background fill values that are the same as the
532cef8759bSmrg         foreground values, reduce the number of elements per pattern
533cef8759bSmrg         from 2 to 1.  */
534cef8759bSmrg   while (m_nelts_per_pattern > 1
535cef8759bSmrg 	 && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
536cef8759bSmrg 				  encoded_nelts (), m_npatterns))
537cef8759bSmrg     /* The last two sequences of M_NPATTERNS elements are equal,
538cef8759bSmrg        so remove the last one.  */
539cef8759bSmrg     reshape (m_npatterns, m_nelts_per_pattern - 1);
540cef8759bSmrg 
541cef8759bSmrg   if (pow2p_hwi (m_npatterns))
542cef8759bSmrg     {
543cef8759bSmrg       /* Try to halve the number of patterns while doing so gives a
544cef8759bSmrg 	 valid pattern.  This approach is linear in the number of
545cef8759bSmrg 	 elements, whereas searcing from 1 up would be O(n*log(n)).
546cef8759bSmrg 
547cef8759bSmrg 	 Each halving step tries to keep the number of elements per pattern
548cef8759bSmrg 	 the same.  If that isn't possible, and if all elements are still
549cef8759bSmrg 	 explicitly encoded, the halving step can instead increase the number
550cef8759bSmrg 	 of elements per pattern.
551cef8759bSmrg 
552cef8759bSmrg 	 E.g. for:
553cef8759bSmrg 
554cef8759bSmrg 	     { 0, 2, 3, 4, 5, 6, 7, 8 }  npatterns == 8  full_nelts == 8
555cef8759bSmrg 
556cef8759bSmrg 	 we first realize that the second half of the sequence is not
557cef8759bSmrg 	 equal to the first, so we cannot maintain 1 element per pattern
558cef8759bSmrg 	 for npatterns == 4.  Instead we halve the number of patterns
559cef8759bSmrg 	 and double the number of elements per pattern, treating this
560cef8759bSmrg 	 as a "foreground" { 0, 2, 3, 4 } against a "background" of
561cef8759bSmrg 	 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
562cef8759bSmrg 
563cef8759bSmrg 	     { 0, 2, 3, 4 | 5, 6, 7, 8 }  npatterns == 4
564cef8759bSmrg 
565cef8759bSmrg 	 Next we realize that this is *not* a foreround of { 0, 2 }
566cef8759bSmrg 	 against a background of { 3, 4 | 3, 4 ... }, so the only
567cef8759bSmrg 	 remaining option for reducing the number of patterns is
568cef8759bSmrg 	 to use a foreground of { 0, 2 } against a stepped background
569cef8759bSmrg 	 of { 1, 2 | 3, 4 | 5, 6 ... }.  This is valid because we still
570cef8759bSmrg 	 haven't elided any elements:
571cef8759bSmrg 
572cef8759bSmrg 	     { 0, 2 | 3, 4 | 5, 6 }  npatterns == 2
573cef8759bSmrg 
574cef8759bSmrg 	 This in turn can be reduced to a foreground of { 0 } against a
575cef8759bSmrg 	 stepped background of { 1 | 2 | 3 ... }:
576cef8759bSmrg 
577cef8759bSmrg 	     { 0 | 2 | 3 }  npatterns == 1
578cef8759bSmrg 
579cef8759bSmrg 	 This last step would not have been possible for:
580cef8759bSmrg 
581cef8759bSmrg 	     { 0, 0 | 3, 4 | 5, 6 }  npatterns == 2.  */
582cef8759bSmrg       while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
583cef8759bSmrg 	continue;
584cef8759bSmrg 
585cef8759bSmrg       /* Builders of arbitrary fixed-length vectors can use:
586cef8759bSmrg 
587cef8759bSmrg 	     new_vector (x, x, 1)
588cef8759bSmrg 
589cef8759bSmrg 	 so that every element is specified explicitly.  Handle cases
590cef8759bSmrg 	 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
591cef8759bSmrg 	 would be for 2-bit elements.  We'll have treated them as
592cef8759bSmrg 	 duplicates in the loop above.  */
593cef8759bSmrg       if (m_nelts_per_pattern == 1
594cef8759bSmrg 	  && m_full_nelts.is_constant (&const_full_nelts)
595cef8759bSmrg 	  && this->length () >= const_full_nelts
596cef8759bSmrg 	  && (m_npatterns & 3) == 0
597cef8759bSmrg 	  && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
598cef8759bSmrg 				 m_npatterns / 4))
599cef8759bSmrg 	{
600cef8759bSmrg 	  reshape (m_npatterns / 4, 3);
601cef8759bSmrg 	  while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
602cef8759bSmrg 	    continue;
603cef8759bSmrg 	}
604cef8759bSmrg     }
605cef8759bSmrg   else
606cef8759bSmrg     /* For the non-power-of-2 case, do a simple search up from 1.  */
607cef8759bSmrg     for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
608cef8759bSmrg       if (m_npatterns % i == 0 && try_npatterns (i))
609cef8759bSmrg 	break;
610cef8759bSmrg }
611cef8759bSmrg 
612cef8759bSmrg #endif
613