131
|
1 /* A class for building vector constant patterns.
|
|
2 Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #ifndef GCC_VECTOR_BUILDER_H
|
|
21 #define GCC_VECTOR_BUILDER_H
|
|
22
|
|
23 /* This class is a wrapper around auto_vec<T> for building vectors of T.
|
|
24 It aims to encode each vector as npatterns interleaved patterns,
|
|
25 where each pattern represents a sequence:
|
|
26
|
|
27 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
|
|
28
|
|
29 The first three elements in each pattern provide enough information
|
|
30 to derive the other elements. If all patterns have a STEP of zero,
|
|
31 we only need to encode the first two elements in each pattern.
|
|
32 If BASE1 is also equal to BASE0 for all patterns, we only need to
|
|
33 encode the first element in each pattern. The number of encoded
|
|
34 elements per pattern is given by nelts_per_pattern.
|
|
35
|
|
36 The class can be used in two ways:
|
|
37
|
|
38 1. It can be used to build a full image of the vector, which is then
|
|
39 canonicalized by finalize (). In this case npatterns is initially
|
|
40 the number of elements in the vector and nelts_per_pattern is
|
|
41 initially 1.
|
|
42
|
|
43 2. It can be used to build a vector that already has a known encoding.
|
|
44 This is preferred since it is more efficient and copes with
|
|
45 variable-length vectors. finalize () then canonicalizes the encoding
|
|
46 to a simpler form if possible.
|
|
47
|
|
48 The derived class Derived provides this functionality for specific Ts.
|
|
49 Derived needs to provide the following interface:
|
|
50
|
|
51 bool equal_p (T elt1, T elt2) const;
|
|
52
|
|
53 Return true if elements ELT1 and ELT2 are equal.
|
|
54
|
|
55 bool allow_steps_p () const;
|
|
56
|
|
57 Return true if a stepped representation is OK. We don't allow
|
|
58 linear series for anything other than integers, to avoid problems
|
|
59 with rounding.
|
|
60
|
|
61 bool integral_p (T elt) const;
|
|
62
|
|
63 Return true if element ELT can be interpreted as an integer.
|
|
64
|
|
65 StepType step (T elt1, T elt2) const;
|
|
66
|
|
67 Return the value of element ELT2 minus the value of element ELT1,
|
|
68 given integral_p (ELT1) && integral_p (ELT2). There is no fixed
|
|
69 choice of StepType.
|
|
70
|
|
71 T apply_step (T base, unsigned int factor, StepType step) const;
|
|
72
|
|
73 Return a vector element with the value BASE + FACTOR * STEP.
|
|
74
|
|
75 bool can_elide_p (T elt) const;
|
|
76
|
|
77 Return true if we can drop element ELT, even if the retained
|
|
78 elements are different. This is provided for TREE_OVERFLOW
|
|
79 handling.
|
|
80
|
|
81 void note_representative (T *elt1_ptr, T elt2);
|
|
82
|
|
83 Record that ELT2 is being elided, given that ELT1_PTR points to
|
|
84 the last encoded element for the containing pattern. This is
|
|
85 again provided for TREE_OVERFLOW handling. */
|
|
86
|
|
87 template<typename T, typename Derived>
|
|
88 class vector_builder : public auto_vec<T, 32>
|
|
89 {
|
|
90 public:
|
|
91 vector_builder ();
|
|
92
|
|
93 poly_uint64 full_nelts () const { return m_full_nelts; }
|
|
94 unsigned int npatterns () const { return m_npatterns; }
|
|
95 unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
|
|
96 unsigned int encoded_nelts () const;
|
|
97 bool encoded_full_vector_p () const;
|
|
98 T elt (unsigned int) const;
|
|
99
|
|
100 bool operator == (const Derived &) const;
|
|
101 bool operator != (const Derived &x) const { return !operator == (x); }
|
|
102
|
|
103 void finalize ();
|
|
104
|
|
105 protected:
|
|
106 void new_vector (poly_uint64, unsigned int, unsigned int);
|
|
107 void reshape (unsigned int, unsigned int);
|
|
108 bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
|
|
109 bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
|
|
110 bool try_npatterns (unsigned int);
|
|
111
|
|
112 private:
|
|
113 vector_builder (const vector_builder &);
|
|
114 vector_builder &operator= (const vector_builder &);
|
|
115 Derived *derived () { return static_cast<Derived *> (this); }
|
|
116 const Derived *derived () const;
|
|
117
|
|
118 poly_uint64 m_full_nelts;
|
|
119 unsigned int m_npatterns;
|
|
120 unsigned int m_nelts_per_pattern;
|
|
121 };
|
|
122
|
|
123 template<typename T, typename Derived>
|
|
124 inline const Derived *
|
|
125 vector_builder<T, Derived>::derived () const
|
|
126 {
|
|
127 return static_cast<const Derived *> (this);
|
|
128 }
|
|
129
|
|
130 template<typename T, typename Derived>
|
|
131 inline
|
|
132 vector_builder<T, Derived>::vector_builder ()
|
|
133 : m_full_nelts (0),
|
|
134 m_npatterns (0),
|
|
135 m_nelts_per_pattern (0)
|
|
136 {}
|
|
137
|
|
138 /* Return the number of elements that are explicitly encoded. The vec
|
|
139 starts with these explicitly-encoded elements and may contain additional
|
|
140 elided elements. */
|
|
141
|
|
142 template<typename T, typename Derived>
|
|
143 inline unsigned int
|
|
144 vector_builder<T, Derived>::encoded_nelts () const
|
|
145 {
|
|
146 return m_npatterns * m_nelts_per_pattern;
|
|
147 }
|
|
148
|
|
149 /* Return true if every element of the vector is explicitly encoded. */
|
|
150
|
|
151 template<typename T, typename Derived>
|
|
152 inline bool
|
|
153 vector_builder<T, Derived>::encoded_full_vector_p () const
|
|
154 {
|
|
155 return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
|
|
156 }
|
|
157
|
|
158 /* Start building a vector that has FULL_NELTS elements. Initially
|
|
159 encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
|
|
160
|
|
161 template<typename T, typename Derived>
|
|
162 void
|
|
163 vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts,
|
|
164 unsigned int npatterns,
|
|
165 unsigned int nelts_per_pattern)
|
|
166 {
|
|
167 m_full_nelts = full_nelts;
|
|
168 m_npatterns = npatterns;
|
|
169 m_nelts_per_pattern = nelts_per_pattern;
|
|
170 this->reserve (encoded_nelts ());
|
|
171 this->truncate (0);
|
|
172 }
|
|
173
|
|
174 /* Return true if this vector and OTHER have the same elements and
|
|
175 are encoded in the same way. */
|
|
176
|
|
177 template<typename T, typename Derived>
|
|
178 bool
|
|
179 vector_builder<T, Derived>::operator == (const Derived &other) const
|
|
180 {
|
|
181 if (maybe_ne (m_full_nelts, other.m_full_nelts)
|
|
182 || m_npatterns != other.m_npatterns
|
|
183 || m_nelts_per_pattern != other.m_nelts_per_pattern)
|
|
184 return false;
|
|
185
|
|
186 unsigned int nelts = encoded_nelts ();
|
|
187 for (unsigned int i = 0; i < nelts; ++i)
|
|
188 if (!derived ()->equal_p ((*this)[i], other[i]))
|
|
189 return false;
|
|
190
|
|
191 return true;
|
|
192 }
|
|
193
|
|
194 /* Return the value of vector element I, which might or might not be
|
|
195 encoded explicitly. */
|
|
196
|
|
197 template<typename T, typename Derived>
|
|
198 T
|
|
199 vector_builder<T, Derived>::elt (unsigned int i) const
|
|
200 {
|
|
201 /* This only makes sense if the encoding has been fully populated. */
|
|
202 gcc_checking_assert (encoded_nelts () <= this->length ());
|
|
203
|
|
204 /* First handle elements that are already present in the underlying
|
|
205 vector, regardless of whether they're part of the encoding or not. */
|
|
206 if (i < this->length ())
|
|
207 return (*this)[i];
|
|
208
|
|
209 /* Identify the pattern that contains element I and work out the index of
|
|
210 the last encoded element for that pattern. */
|
|
211 unsigned int pattern = i % m_npatterns;
|
|
212 unsigned int count = i / m_npatterns;
|
|
213 unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
|
|
214 T final = (*this)[final_i];
|
|
215
|
|
216 /* If there are no steps, the final encoded value is the right one. */
|
|
217 if (m_nelts_per_pattern <= 2)
|
|
218 return final;
|
|
219
|
|
220 /* Otherwise work out the value from the last two encoded elements. */
|
|
221 T prev = (*this)[final_i - m_npatterns];
|
|
222 return derived ()->apply_step (final, count - 2,
|
|
223 derived ()->step (prev, final));
|
|
224 }
|
|
225
|
|
226 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
|
|
227 but without changing the underlying vector. */
|
|
228
|
|
229 template<typename T, typename Derived>
|
|
230 void
|
|
231 vector_builder<T, Derived>::reshape (unsigned int npatterns,
|
|
232 unsigned int nelts_per_pattern)
|
|
233 {
|
|
234 unsigned int old_encoded_nelts = encoded_nelts ();
|
|
235 unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
|
|
236 gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
|
|
237 unsigned int next = new_encoded_nelts - npatterns;
|
|
238 for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
|
|
239 {
|
|
240 derived ()->note_representative (&(*this)[next], (*this)[i]);
|
|
241 next += 1;
|
|
242 if (next == new_encoded_nelts)
|
|
243 next -= npatterns;
|
|
244 }
|
|
245 m_npatterns = npatterns;
|
|
246 m_nelts_per_pattern = nelts_per_pattern;
|
|
247 }
|
|
248
|
|
249 /* Return true if elements [START, END) contain a repeating sequence of
|
|
250 STEP elements. */
|
|
251
|
|
252 template<typename T, typename Derived>
|
|
253 bool
|
|
254 vector_builder<T, Derived>::repeating_sequence_p (unsigned int start,
|
|
255 unsigned int end,
|
|
256 unsigned int step)
|
|
257 {
|
|
258 for (unsigned int i = start; i < end - step; ++i)
|
|
259 if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
|
|
260 return false;
|
|
261 return true;
|
|
262 }
|
|
263
|
|
264 /* Return true if elements [START, END) contain STEP interleaved linear
|
|
265 series. */
|
|
266
|
|
267 template<typename T, typename Derived>
|
|
268 bool
|
|
269 vector_builder<T, Derived>::stepped_sequence_p (unsigned int start,
|
|
270 unsigned int end,
|
|
271 unsigned int step)
|
|
272 {
|
|
273 if (!derived ()->allow_steps_p ())
|
|
274 return false;
|
|
275
|
|
276 for (unsigned int i = start + step * 2; i < end; ++i)
|
|
277 {
|
|
278 T elt1 = (*this)[i - step * 2];
|
|
279 T elt2 = (*this)[i - step];
|
|
280 T elt3 = (*this)[i];
|
|
281
|
|
282 if (!derived ()->integral_p (elt1)
|
|
283 || !derived ()->integral_p (elt2)
|
|
284 || !derived ()->integral_p (elt3))
|
|
285 return false;
|
|
286
|
|
287 if (maybe_ne (derived ()->step (elt1, elt2),
|
|
288 derived ()->step (elt2, elt3)))
|
|
289 return false;
|
|
290
|
|
291 if (!derived ()->can_elide_p (elt3))
|
|
292 return false;
|
|
293 }
|
|
294 return true;
|
|
295 }
|
|
296
|
|
297 /* Try to change the number of encoded patterns to NPATTERNS, returning
|
|
298 true on success. */
|
|
299
|
|
300 template<typename T, typename Derived>
|
|
301 bool
|
|
302 vector_builder<T, Derived>::try_npatterns (unsigned int npatterns)
|
|
303 {
|
|
304 if (m_nelts_per_pattern == 1)
|
|
305 {
|
|
306 /* See whether NPATTERNS is valid with the current 1-element-per-pattern
|
|
307 encoding. */
|
|
308 if (repeating_sequence_p (0, encoded_nelts (), npatterns))
|
|
309 {
|
|
310 reshape (npatterns, 1);
|
|
311 return true;
|
|
312 }
|
|
313
|
|
314 /* We can only increase the number of elements per pattern if all
|
|
315 elements are still encoded explicitly. */
|
|
316 if (!encoded_full_vector_p ())
|
|
317 return false;
|
|
318 }
|
|
319
|
|
320 if (m_nelts_per_pattern <= 2)
|
|
321 {
|
|
322 /* See whether NPATTERNS is valid with a 2-element-per-pattern
|
|
323 encoding. */
|
|
324 if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
|
|
325 {
|
|
326 reshape (npatterns, 2);
|
|
327 return true;
|
|
328 }
|
|
329
|
|
330 /* We can only increase the number of elements per pattern if all
|
|
331 elements are still encoded explicitly. */
|
|
332 if (!encoded_full_vector_p ())
|
|
333 return false;
|
|
334 }
|
|
335
|
|
336 if (m_nelts_per_pattern <= 3)
|
|
337 {
|
|
338 /* See whether we have NPATTERNS interleaved linear series,
|
|
339 giving a 3-element-per-pattern encoding. */
|
|
340 if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
|
|
341 {
|
|
342 reshape (npatterns, 3);
|
|
343 return true;
|
|
344 }
|
|
345 return false;
|
|
346 }
|
|
347
|
|
348 gcc_unreachable ();
|
|
349 }
|
|
350
|
|
351 /* Replace the current encoding with the canonical form. */
|
|
352
|
|
353 template<typename T, typename Derived>
|
|
354 void
|
|
355 vector_builder<T, Derived>::finalize ()
|
|
356 {
|
|
357 /* The encoding requires the same number of elements to come from each
|
|
358 pattern. */
|
|
359 gcc_assert (multiple_p (m_full_nelts, m_npatterns));
|
|
360
|
|
361 /* Allow the caller to build more elements than necessary. For example,
|
|
362 it's often convenient to build a stepped vector from the natural
|
|
363 encoding of three elements even if the vector itself only has two. */
|
|
364 unsigned HOST_WIDE_INT const_full_nelts;
|
|
365 if (m_full_nelts.is_constant (&const_full_nelts)
|
|
366 && const_full_nelts <= encoded_nelts ())
|
|
367 {
|
|
368 m_npatterns = const_full_nelts;
|
|
369 m_nelts_per_pattern = 1;
|
|
370 }
|
|
371
|
|
372 /* Try to whittle down the number of elements per pattern. That is:
|
|
373
|
|
374 1. If we have stepped patterns whose steps are all 0, reduce the
|
|
375 number of elements per pattern from 3 to 2.
|
|
376
|
|
377 2. If we have background fill values that are the same as the
|
|
378 foreground values, reduce the number of elements per pattern
|
|
379 from 2 to 1. */
|
|
380 while (m_nelts_per_pattern > 1
|
|
381 && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
|
|
382 encoded_nelts (), m_npatterns))
|
|
383 /* The last two sequences of M_NPATTERNS elements are equal,
|
|
384 so remove the last one. */
|
|
385 reshape (m_npatterns, m_nelts_per_pattern - 1);
|
|
386
|
|
387 if (pow2p_hwi (m_npatterns))
|
|
388 {
|
|
389 /* Try to halve the number of patterns while doing so gives a
|
|
390 valid pattern. This approach is linear in the number of
|
|
391 elements, whereas searcing from 1 up would be O(n*log(n)).
|
|
392
|
|
393 Each halving step tries to keep the number of elements per pattern
|
|
394 the same. If that isn't possible, and if all elements are still
|
|
395 explicitly encoded, the halving step can instead increase the number
|
|
396 of elements per pattern.
|
|
397
|
|
398 E.g. for:
|
|
399
|
|
400 { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8
|
|
401
|
|
402 we first realize that the second half of the sequence is not
|
|
403 equal to the first, so we cannot maintain 1 element per pattern
|
|
404 for npatterns == 4. Instead we halve the number of patterns
|
|
405 and double the number of elements per pattern, treating this
|
|
406 as a "foreground" { 0, 2, 3, 4 } against a "background" of
|
|
407 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
|
|
408
|
|
409 { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4
|
|
410
|
|
411 Next we realize that this is *not* a foreround of { 0, 2 }
|
|
412 against a background of { 3, 4 | 3, 4 ... }, so the only
|
|
413 remaining option for reducing the number of patterns is
|
|
414 to use a foreground of { 0, 2 } against a stepped background
|
|
415 of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still
|
|
416 haven't elided any elements:
|
|
417
|
|
418 { 0, 2 | 3, 4 | 5, 6 } npatterns == 2
|
|
419
|
|
420 This in turn can be reduced to a foreground of { 0 } against a
|
|
421 stepped background of { 1 | 2 | 3 ... }:
|
|
422
|
|
423 { 0 | 2 | 3 } npatterns == 1
|
|
424
|
|
425 This last step would not have been possible for:
|
|
426
|
|
427 { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */
|
|
428 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
|
429 continue;
|
|
430
|
|
431 /* Builders of arbitrary fixed-length vectors can use:
|
|
432
|
|
433 new_vector (x, x, 1)
|
|
434
|
|
435 so that every element is specified explicitly. Handle cases
|
|
436 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
|
|
437 would be for 2-bit elements. We'll have treated them as
|
|
438 duplicates in the loop above. */
|
|
439 if (m_nelts_per_pattern == 1
|
|
440 && m_full_nelts.is_constant (&const_full_nelts)
|
|
441 && this->length () >= const_full_nelts
|
|
442 && (m_npatterns & 3) == 0
|
|
443 && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
|
|
444 m_npatterns / 4))
|
|
445 {
|
|
446 reshape (m_npatterns / 4, 3);
|
|
447 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
|
448 continue;
|
|
449 }
|
|
450 }
|
|
451 else
|
|
452 /* For the non-power-of-2 case, do a simple search up from 1. */
|
|
453 for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
|
|
454 if (m_npatterns % i == 0 && try_npatterns (i))
|
|
455 break;
|
|
456 }
|
|
457
|
|
458 #endif
|