1cef8759bSmrg /* A class for building vector constant patterns.
2*4c3eb207Smrg Copyright (C) 2017-2020 Free Software Foundation, Inc.
3cef8759bSmrg
4cef8759bSmrg This file is part of GCC.
5cef8759bSmrg
6cef8759bSmrg GCC is free software; you can redistribute it and/or modify it under
7cef8759bSmrg the terms of the GNU General Public License as published by the Free
8cef8759bSmrg Software Foundation; either version 3, or (at your option) any later
9cef8759bSmrg version.
10cef8759bSmrg
11cef8759bSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12cef8759bSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
13cef8759bSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14cef8759bSmrg for more details.
15cef8759bSmrg
16cef8759bSmrg You should have received a copy of the GNU General Public License
17cef8759bSmrg along with GCC; see the file COPYING3. If not see
18cef8759bSmrg <http://www.gnu.org/licenses/>. */
19cef8759bSmrg
20cef8759bSmrg #ifndef GCC_VECTOR_BUILDER_H
21cef8759bSmrg #define GCC_VECTOR_BUILDER_H
22cef8759bSmrg
23cef8759bSmrg /* This class is a wrapper around auto_vec<T> for building vectors of T.
24cef8759bSmrg It aims to encode each vector as npatterns interleaved patterns,
25cef8759bSmrg where each pattern represents a sequence:
26cef8759bSmrg
27cef8759bSmrg { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
28cef8759bSmrg
29cef8759bSmrg The first three elements in each pattern provide enough information
30cef8759bSmrg to derive the other elements. If all patterns have a STEP of zero,
31cef8759bSmrg we only need to encode the first two elements in each pattern.
32cef8759bSmrg If BASE1 is also equal to BASE0 for all patterns, we only need to
33cef8759bSmrg encode the first element in each pattern. The number of encoded
34cef8759bSmrg elements per pattern is given by nelts_per_pattern.
35cef8759bSmrg
36cef8759bSmrg The class can be used in two ways:
37cef8759bSmrg
38cef8759bSmrg 1. It can be used to build a full image of the vector, which is then
39cef8759bSmrg canonicalized by finalize (). In this case npatterns is initially
40cef8759bSmrg the number of elements in the vector and nelts_per_pattern is
41cef8759bSmrg initially 1.
42cef8759bSmrg
43cef8759bSmrg 2. It can be used to build a vector that already has a known encoding.
44cef8759bSmrg This is preferred since it is more efficient and copes with
45cef8759bSmrg variable-length vectors. finalize () then canonicalizes the encoding
46cef8759bSmrg to a simpler form if possible.
47cef8759bSmrg
48*4c3eb207Smrg Shape is the type that specifies the number of elements in the vector
49*4c3eb207Smrg and (where relevant) the type of each element.
50*4c3eb207Smrg
51*4c3eb207Smrg The derived class Derived provides the functionality of this class
52*4c3eb207Smrg for specific Ts. Derived needs to provide the following interface:
53cef8759bSmrg
54cef8759bSmrg bool equal_p (T elt1, T elt2) const;
55cef8759bSmrg
56cef8759bSmrg Return true if elements ELT1 and ELT2 are equal.
57cef8759bSmrg
58cef8759bSmrg bool allow_steps_p () const;
59cef8759bSmrg
60cef8759bSmrg Return true if a stepped representation is OK. We don't allow
61cef8759bSmrg linear series for anything other than integers, to avoid problems
62cef8759bSmrg with rounding.
63cef8759bSmrg
64cef8759bSmrg bool integral_p (T elt) const;
65cef8759bSmrg
66cef8759bSmrg Return true if element ELT can be interpreted as an integer.
67cef8759bSmrg
68cef8759bSmrg StepType step (T elt1, T elt2) const;
69cef8759bSmrg
70cef8759bSmrg Return the value of element ELT2 minus the value of element ELT1,
71cef8759bSmrg given integral_p (ELT1) && integral_p (ELT2). There is no fixed
72cef8759bSmrg choice of StepType.
73cef8759bSmrg
74cef8759bSmrg T apply_step (T base, unsigned int factor, StepType step) const;
75cef8759bSmrg
76cef8759bSmrg Return a vector element with the value BASE + FACTOR * STEP.
77cef8759bSmrg
78cef8759bSmrg bool can_elide_p (T elt) const;
79cef8759bSmrg
80cef8759bSmrg Return true if we can drop element ELT, even if the retained
81cef8759bSmrg elements are different. This is provided for TREE_OVERFLOW
82cef8759bSmrg handling.
83cef8759bSmrg
84cef8759bSmrg void note_representative (T *elt1_ptr, T elt2);
85cef8759bSmrg
86cef8759bSmrg Record that ELT2 is being elided, given that ELT1_PTR points to
87cef8759bSmrg the last encoded element for the containing pattern. This is
88*4c3eb207Smrg again provided for TREE_OVERFLOW handling.
89cef8759bSmrg
90*4c3eb207Smrg static poly_uint64 shape_nelts (Shape shape);
91*4c3eb207Smrg
92*4c3eb207Smrg Return the number of elements in SHAPE.
93*4c3eb207Smrg
94*4c3eb207Smrg The class provides additional functionality for the case in which
95*4c3eb207Smrg T can describe a vector constant as well as an individual element.
96*4c3eb207Smrg This functionality requires:
97*4c3eb207Smrg
98*4c3eb207Smrg static poly_uint64 nelts_of (T x);
99*4c3eb207Smrg
100*4c3eb207Smrg Return the number of elements in vector constant X.
101*4c3eb207Smrg
102*4c3eb207Smrg static unsigned int npatterns_of (T x);
103*4c3eb207Smrg
104*4c3eb207Smrg Return the number of patterns used to encode vector constant X.
105*4c3eb207Smrg
106*4c3eb207Smrg static unsigned int nelts_per_pattern_of (T x);
107*4c3eb207Smrg
108*4c3eb207Smrg Return the number of elements used to encode each pattern
109*4c3eb207Smrg in vector constant X. */
110*4c3eb207Smrg
111*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
112cef8759bSmrg class vector_builder : public auto_vec<T, 32>
113cef8759bSmrg {
114cef8759bSmrg public:
115cef8759bSmrg vector_builder ();
116cef8759bSmrg
full_nelts()117cef8759bSmrg poly_uint64 full_nelts () const { return m_full_nelts; }
npatterns()118cef8759bSmrg unsigned int npatterns () const { return m_npatterns; }
nelts_per_pattern()119cef8759bSmrg unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
120cef8759bSmrg unsigned int encoded_nelts () const;
121cef8759bSmrg bool encoded_full_vector_p () const;
122cef8759bSmrg T elt (unsigned int) const;
123*4c3eb207Smrg unsigned int count_dups (int, int, int) const;
124cef8759bSmrg
125cef8759bSmrg bool operator == (const Derived &) const;
126cef8759bSmrg bool operator != (const Derived &x) const { return !operator == (x); }
127cef8759bSmrg
128*4c3eb207Smrg bool new_unary_operation (Shape, T, bool);
129*4c3eb207Smrg bool new_binary_operation (Shape, T, T, bool);
130*4c3eb207Smrg
131cef8759bSmrg void finalize ();
132cef8759bSmrg
133*4c3eb207Smrg static unsigned int binary_encoded_nelts (T, T);
134*4c3eb207Smrg
135cef8759bSmrg protected:
136cef8759bSmrg void new_vector (poly_uint64, unsigned int, unsigned int);
137cef8759bSmrg void reshape (unsigned int, unsigned int);
138cef8759bSmrg bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
139cef8759bSmrg bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
140cef8759bSmrg bool try_npatterns (unsigned int);
141cef8759bSmrg
142cef8759bSmrg private:
143cef8759bSmrg vector_builder (const vector_builder &);
144cef8759bSmrg vector_builder &operator= (const vector_builder &);
derived()145cef8759bSmrg Derived *derived () { return static_cast<Derived *> (this); }
146cef8759bSmrg const Derived *derived () const;
147cef8759bSmrg
148cef8759bSmrg poly_uint64 m_full_nelts;
149cef8759bSmrg unsigned int m_npatterns;
150cef8759bSmrg unsigned int m_nelts_per_pattern;
151cef8759bSmrg };
152cef8759bSmrg
153*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
154cef8759bSmrg inline const Derived *
derived()155*4c3eb207Smrg vector_builder<T, Shape, Derived>::derived () const
156cef8759bSmrg {
157cef8759bSmrg return static_cast<const Derived *> (this);
158cef8759bSmrg }
159cef8759bSmrg
160*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
161cef8759bSmrg inline
vector_builder()162*4c3eb207Smrg vector_builder<T, Shape, Derived>::vector_builder ()
163cef8759bSmrg : m_full_nelts (0),
164cef8759bSmrg m_npatterns (0),
165cef8759bSmrg m_nelts_per_pattern (0)
166cef8759bSmrg {}
167cef8759bSmrg
168cef8759bSmrg /* Return the number of elements that are explicitly encoded. The vec
169cef8759bSmrg starts with these explicitly-encoded elements and may contain additional
170cef8759bSmrg elided elements. */
171cef8759bSmrg
172*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
173cef8759bSmrg inline unsigned int
encoded_nelts()174*4c3eb207Smrg vector_builder<T, Shape, Derived>::encoded_nelts () const
175cef8759bSmrg {
176cef8759bSmrg return m_npatterns * m_nelts_per_pattern;
177cef8759bSmrg }
178cef8759bSmrg
179cef8759bSmrg /* Return true if every element of the vector is explicitly encoded. */
180cef8759bSmrg
181*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
182cef8759bSmrg inline bool
encoded_full_vector_p()183*4c3eb207Smrg vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
184cef8759bSmrg {
185cef8759bSmrg return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
186cef8759bSmrg }
187cef8759bSmrg
188cef8759bSmrg /* Start building a vector that has FULL_NELTS elements. Initially
189cef8759bSmrg encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
190cef8759bSmrg
191*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
192cef8759bSmrg void
new_vector(poly_uint64 full_nelts,unsigned int npatterns,unsigned int nelts_per_pattern)193*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
194cef8759bSmrg unsigned int npatterns,
195cef8759bSmrg unsigned int nelts_per_pattern)
196cef8759bSmrg {
197cef8759bSmrg m_full_nelts = full_nelts;
198cef8759bSmrg m_npatterns = npatterns;
199cef8759bSmrg m_nelts_per_pattern = nelts_per_pattern;
200cef8759bSmrg this->reserve (encoded_nelts ());
201cef8759bSmrg this->truncate (0);
202cef8759bSmrg }
203cef8759bSmrg
204cef8759bSmrg /* Return true if this vector and OTHER have the same elements and
205cef8759bSmrg are encoded in the same way. */
206cef8759bSmrg
207*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
208cef8759bSmrg bool
209*4c3eb207Smrg vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
210cef8759bSmrg {
211cef8759bSmrg if (maybe_ne (m_full_nelts, other.m_full_nelts)
212cef8759bSmrg || m_npatterns != other.m_npatterns
213cef8759bSmrg || m_nelts_per_pattern != other.m_nelts_per_pattern)
214cef8759bSmrg return false;
215cef8759bSmrg
216cef8759bSmrg unsigned int nelts = encoded_nelts ();
217cef8759bSmrg for (unsigned int i = 0; i < nelts; ++i)
218cef8759bSmrg if (!derived ()->equal_p ((*this)[i], other[i]))
219cef8759bSmrg return false;
220cef8759bSmrg
221cef8759bSmrg return true;
222cef8759bSmrg }
223cef8759bSmrg
224cef8759bSmrg /* Return the value of vector element I, which might or might not be
225cef8759bSmrg encoded explicitly. */
226cef8759bSmrg
227*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
228cef8759bSmrg T
elt(unsigned int i)229*4c3eb207Smrg vector_builder<T, Shape, Derived>::elt (unsigned int i) const
230cef8759bSmrg {
231cef8759bSmrg /* First handle elements that are already present in the underlying
232cef8759bSmrg vector, regardless of whether they're part of the encoding or not. */
233cef8759bSmrg if (i < this->length ())
234cef8759bSmrg return (*this)[i];
235cef8759bSmrg
236*4c3eb207Smrg /* Extrapolation is only possible if the encoding has been fully
237*4c3eb207Smrg populated. */
238*4c3eb207Smrg gcc_checking_assert (encoded_nelts () <= this->length ());
239*4c3eb207Smrg
240cef8759bSmrg /* Identify the pattern that contains element I and work out the index of
241cef8759bSmrg the last encoded element for that pattern. */
242cef8759bSmrg unsigned int pattern = i % m_npatterns;
243cef8759bSmrg unsigned int count = i / m_npatterns;
244cef8759bSmrg unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
245cef8759bSmrg T final = (*this)[final_i];
246cef8759bSmrg
247cef8759bSmrg /* If there are no steps, the final encoded value is the right one. */
248cef8759bSmrg if (m_nelts_per_pattern <= 2)
249cef8759bSmrg return final;
250cef8759bSmrg
251cef8759bSmrg /* Otherwise work out the value from the last two encoded elements. */
252cef8759bSmrg T prev = (*this)[final_i - m_npatterns];
253cef8759bSmrg return derived ()->apply_step (final, count - 2,
254cef8759bSmrg derived ()->step (prev, final));
255cef8759bSmrg }
256cef8759bSmrg
257*4c3eb207Smrg /* Try to start building a new vector of shape SHAPE that holds the result of
258*4c3eb207Smrg a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the
259*4c3eb207Smrg operation can handle stepped encodings directly, without having to expand
260*4c3eb207Smrg the full sequence.
261*4c3eb207Smrg
262*4c3eb207Smrg Return true if the operation is possible, which it always is when
263*4c3eb207Smrg ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
264*4c3eb207Smrg
265*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
266*4c3eb207Smrg bool
new_unary_operation(Shape shape,T vec,bool allow_stepped_p)267*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
268*4c3eb207Smrg bool allow_stepped_p)
269*4c3eb207Smrg {
270*4c3eb207Smrg poly_uint64 full_nelts = Derived::shape_nelts (shape);
271*4c3eb207Smrg gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
272*4c3eb207Smrg unsigned int npatterns = Derived::npatterns_of (vec);
273*4c3eb207Smrg unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
274*4c3eb207Smrg if (!allow_stepped_p && nelts_per_pattern > 2)
275*4c3eb207Smrg {
276*4c3eb207Smrg if (!full_nelts.is_constant ())
277*4c3eb207Smrg return false;
278*4c3eb207Smrg npatterns = full_nelts.to_constant ();
279*4c3eb207Smrg nelts_per_pattern = 1;
280*4c3eb207Smrg }
281*4c3eb207Smrg derived ()->new_vector (shape, npatterns, nelts_per_pattern);
282*4c3eb207Smrg return true;
283*4c3eb207Smrg }
284*4c3eb207Smrg
285*4c3eb207Smrg /* Try to start building a new vector of shape SHAPE that holds the result of
286*4c3eb207Smrg a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is
287*4c3eb207Smrg true if the operation can handle stepped encodings directly, without
288*4c3eb207Smrg having to expand the full sequence.
289*4c3eb207Smrg
290*4c3eb207Smrg Return true if the operation is possible. Leave the builder unchanged
291*4c3eb207Smrg otherwise. */
292*4c3eb207Smrg
293*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
294*4c3eb207Smrg bool
new_binary_operation(Shape shape,T vec1,T vec2,bool allow_stepped_p)295*4c3eb207Smrg vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
296*4c3eb207Smrg T vec1, T vec2,
297*4c3eb207Smrg bool allow_stepped_p)
298*4c3eb207Smrg {
299*4c3eb207Smrg poly_uint64 full_nelts = Derived::shape_nelts (shape);
300*4c3eb207Smrg gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
301*4c3eb207Smrg && known_eq (full_nelts, Derived::nelts_of (vec2)));
302*4c3eb207Smrg /* Conceptually we split the patterns in VEC1 and VEC2 until we have
303*4c3eb207Smrg an equal number for both. Each split pattern requires the same
304*4c3eb207Smrg number of elements per pattern as the original. E.g. splitting:
305*4c3eb207Smrg
306*4c3eb207Smrg { 1, 2, 3, ... }
307*4c3eb207Smrg
308*4c3eb207Smrg into two gives:
309*4c3eb207Smrg
310*4c3eb207Smrg { 1, 3, 5, ... }
311*4c3eb207Smrg { 2, 4, 6, ... }
312*4c3eb207Smrg
313*4c3eb207Smrg while splitting:
314*4c3eb207Smrg
315*4c3eb207Smrg { 1, 0, ... }
316*4c3eb207Smrg
317*4c3eb207Smrg into two gives:
318*4c3eb207Smrg
319*4c3eb207Smrg { 1, 0, ... }
320*4c3eb207Smrg { 0, 0, ... }. */
321*4c3eb207Smrg unsigned int npatterns
322*4c3eb207Smrg = least_common_multiple (Derived::npatterns_of (vec1),
323*4c3eb207Smrg Derived::npatterns_of (vec2));
324*4c3eb207Smrg unsigned int nelts_per_pattern
325*4c3eb207Smrg = MAX (Derived::nelts_per_pattern_of (vec1),
326*4c3eb207Smrg Derived::nelts_per_pattern_of (vec2));
327*4c3eb207Smrg if (!allow_stepped_p && nelts_per_pattern > 2)
328*4c3eb207Smrg {
329*4c3eb207Smrg if (!full_nelts.is_constant ())
330*4c3eb207Smrg return false;
331*4c3eb207Smrg npatterns = full_nelts.to_constant ();
332*4c3eb207Smrg nelts_per_pattern = 1;
333*4c3eb207Smrg }
334*4c3eb207Smrg derived ()->new_vector (shape, npatterns, nelts_per_pattern);
335*4c3eb207Smrg return true;
336*4c3eb207Smrg }
337*4c3eb207Smrg
338*4c3eb207Smrg /* Return the number of elements that the caller needs to operate on in
339*4c3eb207Smrg order to handle a binary operation on vector constants VEC1 and VEC2.
340*4c3eb207Smrg This static function is used instead of new_binary_operation if the
341*4c3eb207Smrg result of the operation is not a constant vector. */
342*4c3eb207Smrg
343*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
344*4c3eb207Smrg unsigned int
binary_encoded_nelts(T vec1,T vec2)345*4c3eb207Smrg vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
346*4c3eb207Smrg {
347*4c3eb207Smrg poly_uint64 nelts = Derived::nelts_of (vec1);
348*4c3eb207Smrg gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
349*4c3eb207Smrg /* See new_binary_operation for details. */
350*4c3eb207Smrg unsigned int npatterns
351*4c3eb207Smrg = least_common_multiple (Derived::npatterns_of (vec1),
352*4c3eb207Smrg Derived::npatterns_of (vec2));
353*4c3eb207Smrg unsigned int nelts_per_pattern
354*4c3eb207Smrg = MAX (Derived::nelts_per_pattern_of (vec1),
355*4c3eb207Smrg Derived::nelts_per_pattern_of (vec2));
356*4c3eb207Smrg unsigned HOST_WIDE_INT const_nelts;
357*4c3eb207Smrg if (nelts.is_constant (&const_nelts))
358*4c3eb207Smrg return MIN (npatterns * nelts_per_pattern, const_nelts);
359*4c3eb207Smrg return npatterns * nelts_per_pattern;
360*4c3eb207Smrg }
361*4c3eb207Smrg
362*4c3eb207Smrg /* Return the number of leading duplicate elements in the range
363*4c3eb207Smrg [START:END:STEP]. The value is always at least 1. */
364*4c3eb207Smrg
365*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
366*4c3eb207Smrg unsigned int
count_dups(int start,int end,int step)367*4c3eb207Smrg vector_builder<T, Shape, Derived>::count_dups (int start, int end,
368*4c3eb207Smrg int step) const
369*4c3eb207Smrg {
370*4c3eb207Smrg gcc_assert ((end - start) % step == 0);
371*4c3eb207Smrg
372*4c3eb207Smrg unsigned int ndups = 1;
373*4c3eb207Smrg for (int i = start + step;
374*4c3eb207Smrg i != end && derived ()->equal_p (elt (i), elt (start));
375*4c3eb207Smrg i += step)
376*4c3eb207Smrg ndups++;
377*4c3eb207Smrg return ndups;
378*4c3eb207Smrg }
379*4c3eb207Smrg
380cef8759bSmrg /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
381cef8759bSmrg but without changing the underlying vector. */
382cef8759bSmrg
383*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
384cef8759bSmrg void
reshape(unsigned int npatterns,unsigned int nelts_per_pattern)385*4c3eb207Smrg vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
386cef8759bSmrg unsigned int nelts_per_pattern)
387cef8759bSmrg {
388cef8759bSmrg unsigned int old_encoded_nelts = encoded_nelts ();
389cef8759bSmrg unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
390cef8759bSmrg gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
391cef8759bSmrg unsigned int next = new_encoded_nelts - npatterns;
392cef8759bSmrg for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
393cef8759bSmrg {
394cef8759bSmrg derived ()->note_representative (&(*this)[next], (*this)[i]);
395cef8759bSmrg next += 1;
396cef8759bSmrg if (next == new_encoded_nelts)
397cef8759bSmrg next -= npatterns;
398cef8759bSmrg }
399cef8759bSmrg m_npatterns = npatterns;
400cef8759bSmrg m_nelts_per_pattern = nelts_per_pattern;
401cef8759bSmrg }
402cef8759bSmrg
403cef8759bSmrg /* Return true if elements [START, END) contain a repeating sequence of
404cef8759bSmrg STEP elements. */
405cef8759bSmrg
406*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
407cef8759bSmrg bool
repeating_sequence_p(unsigned int start,unsigned int end,unsigned int step)408*4c3eb207Smrg vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
409cef8759bSmrg unsigned int end,
410cef8759bSmrg unsigned int step)
411cef8759bSmrg {
412cef8759bSmrg for (unsigned int i = start; i < end - step; ++i)
413cef8759bSmrg if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
414cef8759bSmrg return false;
415cef8759bSmrg return true;
416cef8759bSmrg }
417cef8759bSmrg
418cef8759bSmrg /* Return true if elements [START, END) contain STEP interleaved linear
419cef8759bSmrg series. */
420cef8759bSmrg
421*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
422cef8759bSmrg bool
stepped_sequence_p(unsigned int start,unsigned int end,unsigned int step)423*4c3eb207Smrg vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
424cef8759bSmrg unsigned int end,
425cef8759bSmrg unsigned int step)
426cef8759bSmrg {
427cef8759bSmrg if (!derived ()->allow_steps_p ())
428cef8759bSmrg return false;
429cef8759bSmrg
430cef8759bSmrg for (unsigned int i = start + step * 2; i < end; ++i)
431cef8759bSmrg {
432cef8759bSmrg T elt1 = (*this)[i - step * 2];
433cef8759bSmrg T elt2 = (*this)[i - step];
434cef8759bSmrg T elt3 = (*this)[i];
435cef8759bSmrg
436cef8759bSmrg if (!derived ()->integral_p (elt1)
437cef8759bSmrg || !derived ()->integral_p (elt2)
438cef8759bSmrg || !derived ()->integral_p (elt3))
439cef8759bSmrg return false;
440cef8759bSmrg
441cef8759bSmrg if (maybe_ne (derived ()->step (elt1, elt2),
442cef8759bSmrg derived ()->step (elt2, elt3)))
443cef8759bSmrg return false;
444cef8759bSmrg
445cef8759bSmrg if (!derived ()->can_elide_p (elt3))
446cef8759bSmrg return false;
447cef8759bSmrg }
448cef8759bSmrg return true;
449cef8759bSmrg }
450cef8759bSmrg
451cef8759bSmrg /* Try to change the number of encoded patterns to NPATTERNS, returning
452cef8759bSmrg true on success. */
453cef8759bSmrg
454*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
455cef8759bSmrg bool
try_npatterns(unsigned int npatterns)456*4c3eb207Smrg vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
457cef8759bSmrg {
458cef8759bSmrg if (m_nelts_per_pattern == 1)
459cef8759bSmrg {
460cef8759bSmrg /* See whether NPATTERNS is valid with the current 1-element-per-pattern
461cef8759bSmrg encoding. */
462cef8759bSmrg if (repeating_sequence_p (0, encoded_nelts (), npatterns))
463cef8759bSmrg {
464cef8759bSmrg reshape (npatterns, 1);
465cef8759bSmrg return true;
466cef8759bSmrg }
467cef8759bSmrg
468cef8759bSmrg /* We can only increase the number of elements per pattern if all
469cef8759bSmrg elements are still encoded explicitly. */
470cef8759bSmrg if (!encoded_full_vector_p ())
471cef8759bSmrg return false;
472cef8759bSmrg }
473cef8759bSmrg
474cef8759bSmrg if (m_nelts_per_pattern <= 2)
475cef8759bSmrg {
476cef8759bSmrg /* See whether NPATTERNS is valid with a 2-element-per-pattern
477cef8759bSmrg encoding. */
478cef8759bSmrg if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
479cef8759bSmrg {
480cef8759bSmrg reshape (npatterns, 2);
481cef8759bSmrg return true;
482cef8759bSmrg }
483cef8759bSmrg
484cef8759bSmrg /* We can only increase the number of elements per pattern if all
485cef8759bSmrg elements are still encoded explicitly. */
486cef8759bSmrg if (!encoded_full_vector_p ())
487cef8759bSmrg return false;
488cef8759bSmrg }
489cef8759bSmrg
490cef8759bSmrg if (m_nelts_per_pattern <= 3)
491cef8759bSmrg {
492cef8759bSmrg /* See whether we have NPATTERNS interleaved linear series,
493cef8759bSmrg giving a 3-element-per-pattern encoding. */
494cef8759bSmrg if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
495cef8759bSmrg {
496cef8759bSmrg reshape (npatterns, 3);
497cef8759bSmrg return true;
498cef8759bSmrg }
499cef8759bSmrg return false;
500cef8759bSmrg }
501cef8759bSmrg
502cef8759bSmrg gcc_unreachable ();
503cef8759bSmrg }
504cef8759bSmrg
505cef8759bSmrg /* Replace the current encoding with the canonical form. */
506cef8759bSmrg
507*4c3eb207Smrg template<typename T, typename Shape, typename Derived>
508cef8759bSmrg void
finalize()509*4c3eb207Smrg vector_builder<T, Shape, Derived>::finalize ()
510cef8759bSmrg {
511cef8759bSmrg /* The encoding requires the same number of elements to come from each
512cef8759bSmrg pattern. */
513cef8759bSmrg gcc_assert (multiple_p (m_full_nelts, m_npatterns));
514cef8759bSmrg
515cef8759bSmrg /* Allow the caller to build more elements than necessary. For example,
516cef8759bSmrg it's often convenient to build a stepped vector from the natural
517cef8759bSmrg encoding of three elements even if the vector itself only has two. */
518cef8759bSmrg unsigned HOST_WIDE_INT const_full_nelts;
519cef8759bSmrg if (m_full_nelts.is_constant (&const_full_nelts)
520cef8759bSmrg && const_full_nelts <= encoded_nelts ())
521cef8759bSmrg {
522cef8759bSmrg m_npatterns = const_full_nelts;
523cef8759bSmrg m_nelts_per_pattern = 1;
524cef8759bSmrg }
525cef8759bSmrg
526cef8759bSmrg /* Try to whittle down the number of elements per pattern. That is:
527cef8759bSmrg
528cef8759bSmrg 1. If we have stepped patterns whose steps are all 0, reduce the
529cef8759bSmrg number of elements per pattern from 3 to 2.
530cef8759bSmrg
531cef8759bSmrg 2. If we have background fill values that are the same as the
532cef8759bSmrg foreground values, reduce the number of elements per pattern
533cef8759bSmrg from 2 to 1. */
534cef8759bSmrg while (m_nelts_per_pattern > 1
535cef8759bSmrg && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
536cef8759bSmrg encoded_nelts (), m_npatterns))
537cef8759bSmrg /* The last two sequences of M_NPATTERNS elements are equal,
538cef8759bSmrg so remove the last one. */
539cef8759bSmrg reshape (m_npatterns, m_nelts_per_pattern - 1);
540cef8759bSmrg
541cef8759bSmrg if (pow2p_hwi (m_npatterns))
542cef8759bSmrg {
543cef8759bSmrg /* Try to halve the number of patterns while doing so gives a
544cef8759bSmrg valid pattern. This approach is linear in the number of
545cef8759bSmrg elements, whereas searcing from 1 up would be O(n*log(n)).
546cef8759bSmrg
547cef8759bSmrg Each halving step tries to keep the number of elements per pattern
548cef8759bSmrg the same. If that isn't possible, and if all elements are still
549cef8759bSmrg explicitly encoded, the halving step can instead increase the number
550cef8759bSmrg of elements per pattern.
551cef8759bSmrg
552cef8759bSmrg E.g. for:
553cef8759bSmrg
554cef8759bSmrg { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8
555cef8759bSmrg
556cef8759bSmrg we first realize that the second half of the sequence is not
557cef8759bSmrg equal to the first, so we cannot maintain 1 element per pattern
558cef8759bSmrg for npatterns == 4. Instead we halve the number of patterns
559cef8759bSmrg and double the number of elements per pattern, treating this
560cef8759bSmrg as a "foreground" { 0, 2, 3, 4 } against a "background" of
561cef8759bSmrg { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
562cef8759bSmrg
563cef8759bSmrg { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4
564cef8759bSmrg
565cef8759bSmrg Next we realize that this is *not* a foreround of { 0, 2 }
566cef8759bSmrg against a background of { 3, 4 | 3, 4 ... }, so the only
567cef8759bSmrg remaining option for reducing the number of patterns is
568cef8759bSmrg to use a foreground of { 0, 2 } against a stepped background
569cef8759bSmrg of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still
570cef8759bSmrg haven't elided any elements:
571cef8759bSmrg
572cef8759bSmrg { 0, 2 | 3, 4 | 5, 6 } npatterns == 2
573cef8759bSmrg
574cef8759bSmrg This in turn can be reduced to a foreground of { 0 } against a
575cef8759bSmrg stepped background of { 1 | 2 | 3 ... }:
576cef8759bSmrg
577cef8759bSmrg { 0 | 2 | 3 } npatterns == 1
578cef8759bSmrg
579cef8759bSmrg This last step would not have been possible for:
580cef8759bSmrg
581cef8759bSmrg { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */
582cef8759bSmrg while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
583cef8759bSmrg continue;
584cef8759bSmrg
585cef8759bSmrg /* Builders of arbitrary fixed-length vectors can use:
586cef8759bSmrg
587cef8759bSmrg new_vector (x, x, 1)
588cef8759bSmrg
589cef8759bSmrg so that every element is specified explicitly. Handle cases
590cef8759bSmrg that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
591cef8759bSmrg would be for 2-bit elements. We'll have treated them as
592cef8759bSmrg duplicates in the loop above. */
593cef8759bSmrg if (m_nelts_per_pattern == 1
594cef8759bSmrg && m_full_nelts.is_constant (&const_full_nelts)
595cef8759bSmrg && this->length () >= const_full_nelts
596cef8759bSmrg && (m_npatterns & 3) == 0
597cef8759bSmrg && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
598cef8759bSmrg m_npatterns / 4))
599cef8759bSmrg {
600cef8759bSmrg reshape (m_npatterns / 4, 3);
601cef8759bSmrg while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
602cef8759bSmrg continue;
603cef8759bSmrg }
604cef8759bSmrg }
605cef8759bSmrg else
606cef8759bSmrg /* For the non-power-of-2 case, do a simple search up from 1. */
607cef8759bSmrg for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
608cef8759bSmrg if (m_npatterns % i == 0 && try_npatterns (i))
609cef8759bSmrg break;
610cef8759bSmrg }
611cef8759bSmrg
612cef8759bSmrg #endif
613