131
|
1 /* A class for building vector constant patterns.
|
145
|
2 Copyright (C) 2017-2020 Free Software Foundation, Inc.
|
131
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #ifndef GCC_VECTOR_BUILDER_H
|
|
21 #define GCC_VECTOR_BUILDER_H
|
|
22
|
|
23 /* This class is a wrapper around auto_vec<T> for building vectors of T.
|
|
24 It aims to encode each vector as npatterns interleaved patterns,
|
|
25 where each pattern represents a sequence:
|
|
26
|
|
27 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
|
|
28
|
|
29 The first three elements in each pattern provide enough information
|
|
30 to derive the other elements. If all patterns have a STEP of zero,
|
|
31 we only need to encode the first two elements in each pattern.
|
|
32 If BASE1 is also equal to BASE0 for all patterns, we only need to
|
|
33 encode the first element in each pattern. The number of encoded
|
|
34 elements per pattern is given by nelts_per_pattern.
|
|
35
|
|
36 The class can be used in two ways:
|
|
37
|
|
38 1. It can be used to build a full image of the vector, which is then
|
|
39 canonicalized by finalize (). In this case npatterns is initially
|
|
40 the number of elements in the vector and nelts_per_pattern is
|
|
41 initially 1.
|
|
42
|
|
43 2. It can be used to build a vector that already has a known encoding.
|
|
44 This is preferred since it is more efficient and copes with
|
|
45 variable-length vectors. finalize () then canonicalizes the encoding
|
|
46 to a simpler form if possible.
|
|
47
|
145
|
48 Shape is the type that specifies the number of elements in the vector
|
|
49 and (where relevant) the type of each element.
|
|
50
|
|
51 The derived class Derived provides the functionality of this class
|
|
52 for specific Ts. Derived needs to provide the following interface:
|
131
|
53
|
|
54 bool equal_p (T elt1, T elt2) const;
|
|
55
|
|
56 Return true if elements ELT1 and ELT2 are equal.
|
|
57
|
|
58 bool allow_steps_p () const;
|
|
59
|
|
60 Return true if a stepped representation is OK. We don't allow
|
|
61 linear series for anything other than integers, to avoid problems
|
|
62 with rounding.
|
|
63
|
|
64 bool integral_p (T elt) const;
|
|
65
|
|
66 Return true if element ELT can be interpreted as an integer.
|
|
67
|
|
68 StepType step (T elt1, T elt2) const;
|
|
69
|
|
70 Return the value of element ELT2 minus the value of element ELT1,
|
|
71 given integral_p (ELT1) && integral_p (ELT2). There is no fixed
|
|
72 choice of StepType.
|
|
73
|
|
74 T apply_step (T base, unsigned int factor, StepType step) const;
|
|
75
|
|
76 Return a vector element with the value BASE + FACTOR * STEP.
|
|
77
|
|
78 bool can_elide_p (T elt) const;
|
|
79
|
|
80 Return true if we can drop element ELT, even if the retained
|
|
81 elements are different. This is provided for TREE_OVERFLOW
|
|
82 handling.
|
|
83
|
|
84 void note_representative (T *elt1_ptr, T elt2);
|
|
85
|
|
86 Record that ELT2 is being elided, given that ELT1_PTR points to
|
|
87 the last encoded element for the containing pattern. This is
|
145
|
88 again provided for TREE_OVERFLOW handling.
|
|
89
|
|
90 static poly_uint64 shape_nelts (Shape shape);
|
|
91
|
|
92 Return the number of elements in SHAPE.
|
|
93
|
|
94 The class provides additional functionality for the case in which
|
|
95 T can describe a vector constant as well as an individual element.
|
|
96 This functionality requires:
|
|
97
|
|
98 static poly_uint64 nelts_of (T x);
|
131
|
99
|
145
|
100 Return the number of elements in vector constant X.
|
|
101
|
|
102 static unsigned int npatterns_of (T x);
|
|
103
|
|
104 Return the number of patterns used to encode vector constant X.
|
|
105
|
|
106 static unsigned int nelts_per_pattern_of (T x);
|
|
107
|
|
108 Return the number of elements used to encode each pattern
|
|
109 in vector constant X. */
|
|
110
|
|
111 template<typename T, typename Shape, typename Derived>
|
131
|
112 class vector_builder : public auto_vec<T, 32>
|
|
113 {
|
|
114 public:
|
|
115 vector_builder ();
|
|
116
|
|
117 poly_uint64 full_nelts () const { return m_full_nelts; }
|
|
118 unsigned int npatterns () const { return m_npatterns; }
|
|
119 unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
|
|
120 unsigned int encoded_nelts () const;
|
|
121 bool encoded_full_vector_p () const;
|
|
122 T elt (unsigned int) const;
|
145
|
123 unsigned int count_dups (int, int, int) const;
|
131
|
124
|
|
125 bool operator == (const Derived &) const;
|
|
126 bool operator != (const Derived &x) const { return !operator == (x); }
|
|
127
|
145
|
128 bool new_unary_operation (Shape, T, bool);
|
|
129 bool new_binary_operation (Shape, T, T, bool);
|
|
130
|
131
|
131 void finalize ();
|
|
132
|
145
|
133 static unsigned int binary_encoded_nelts (T, T);
|
|
134
|
131
|
135 protected:
|
|
136 void new_vector (poly_uint64, unsigned int, unsigned int);
|
|
137 void reshape (unsigned int, unsigned int);
|
|
138 bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
|
|
139 bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
|
|
140 bool try_npatterns (unsigned int);
|
|
141
|
|
142 private:
|
|
143 vector_builder (const vector_builder &);
|
|
144 vector_builder &operator= (const vector_builder &);
|
|
145 Derived *derived () { return static_cast<Derived *> (this); }
|
|
146 const Derived *derived () const;
|
|
147
|
|
148 poly_uint64 m_full_nelts;
|
|
149 unsigned int m_npatterns;
|
|
150 unsigned int m_nelts_per_pattern;
|
|
151 };
|
|
152
|
145
|
153 template<typename T, typename Shape, typename Derived>
|
131
|
154 inline const Derived *
|
145
|
155 vector_builder<T, Shape, Derived>::derived () const
|
131
|
156 {
|
|
157 return static_cast<const Derived *> (this);
|
|
158 }
|
|
159
|
145
|
160 template<typename T, typename Shape, typename Derived>
|
131
|
161 inline
|
145
|
162 vector_builder<T, Shape, Derived>::vector_builder ()
|
131
|
163 : m_full_nelts (0),
|
|
164 m_npatterns (0),
|
|
165 m_nelts_per_pattern (0)
|
|
166 {}
|
|
167
|
|
168 /* Return the number of elements that are explicitly encoded. The vec
|
|
169 starts with these explicitly-encoded elements and may contain additional
|
|
170 elided elements. */
|
|
171
|
145
|
172 template<typename T, typename Shape, typename Derived>
|
131
|
173 inline unsigned int
|
145
|
174 vector_builder<T, Shape, Derived>::encoded_nelts () const
|
131
|
175 {
|
|
176 return m_npatterns * m_nelts_per_pattern;
|
|
177 }
|
|
178
|
|
179 /* Return true if every element of the vector is explicitly encoded. */
|
|
180
|
145
|
181 template<typename T, typename Shape, typename Derived>
|
131
|
182 inline bool
|
145
|
183 vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
|
131
|
184 {
|
|
185 return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
|
|
186 }
|
|
187
|
|
188 /* Start building a vector that has FULL_NELTS elements. Initially
|
|
189 encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
|
|
190
|
145
|
191 template<typename T, typename Shape, typename Derived>
|
131
|
192 void
|
145
|
193 vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
|
|
194 unsigned int npatterns,
|
|
195 unsigned int nelts_per_pattern)
|
131
|
196 {
|
|
197 m_full_nelts = full_nelts;
|
|
198 m_npatterns = npatterns;
|
|
199 m_nelts_per_pattern = nelts_per_pattern;
|
|
200 this->reserve (encoded_nelts ());
|
|
201 this->truncate (0);
|
|
202 }
|
|
203
|
|
204 /* Return true if this vector and OTHER have the same elements and
|
|
205 are encoded in the same way. */
|
|
206
|
145
|
207 template<typename T, typename Shape, typename Derived>
|
131
|
208 bool
|
145
|
209 vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
|
131
|
210 {
|
|
211 if (maybe_ne (m_full_nelts, other.m_full_nelts)
|
|
212 || m_npatterns != other.m_npatterns
|
|
213 || m_nelts_per_pattern != other.m_nelts_per_pattern)
|
|
214 return false;
|
|
215
|
|
216 unsigned int nelts = encoded_nelts ();
|
|
217 for (unsigned int i = 0; i < nelts; ++i)
|
|
218 if (!derived ()->equal_p ((*this)[i], other[i]))
|
|
219 return false;
|
|
220
|
|
221 return true;
|
|
222 }
|
|
223
|
|
224 /* Return the value of vector element I, which might or might not be
|
|
225 encoded explicitly. */
|
|
226
|
145
|
227 template<typename T, typename Shape, typename Derived>
|
131
|
228 T
|
145
|
229 vector_builder<T, Shape, Derived>::elt (unsigned int i) const
|
131
|
230 {
|
|
231 /* First handle elements that are already present in the underlying
|
|
232 vector, regardless of whether they're part of the encoding or not. */
|
|
233 if (i < this->length ())
|
|
234 return (*this)[i];
|
|
235
|
145
|
236 /* Extrapolation is only possible if the encoding has been fully
|
|
237 populated. */
|
|
238 gcc_checking_assert (encoded_nelts () <= this->length ());
|
|
239
|
131
|
240 /* Identify the pattern that contains element I and work out the index of
|
|
241 the last encoded element for that pattern. */
|
|
242 unsigned int pattern = i % m_npatterns;
|
|
243 unsigned int count = i / m_npatterns;
|
|
244 unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
|
|
245 T final = (*this)[final_i];
|
|
246
|
|
247 /* If there are no steps, the final encoded value is the right one. */
|
|
248 if (m_nelts_per_pattern <= 2)
|
|
249 return final;
|
|
250
|
|
251 /* Otherwise work out the value from the last two encoded elements. */
|
|
252 T prev = (*this)[final_i - m_npatterns];
|
|
253 return derived ()->apply_step (final, count - 2,
|
|
254 derived ()->step (prev, final));
|
|
255 }
|
|
256
|
145
|
257 /* Try to start building a new vector of shape SHAPE that holds the result of
|
|
258 a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the
|
|
259 operation can handle stepped encodings directly, without having to expand
|
|
260 the full sequence.
|
|
261
|
|
262 Return true if the operation is possible, which it always is when
|
|
263 ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
|
|
264
|
|
265 template<typename T, typename Shape, typename Derived>
|
|
266 bool
|
|
267 vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
|
|
268 bool allow_stepped_p)
|
|
269 {
|
|
270 poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
|
271 gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
|
|
272 unsigned int npatterns = Derived::npatterns_of (vec);
|
|
273 unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
|
|
274 if (!allow_stepped_p && nelts_per_pattern > 2)
|
|
275 {
|
|
276 if (!full_nelts.is_constant ())
|
|
277 return false;
|
|
278 npatterns = full_nelts.to_constant ();
|
|
279 nelts_per_pattern = 1;
|
|
280 }
|
|
281 derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
|
282 return true;
|
|
283 }
|
|
284
|
|
285 /* Try to start building a new vector of shape SHAPE that holds the result of
|
|
286 a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is
|
|
287 true if the operation can handle stepped encodings directly, without
|
|
288 having to expand the full sequence.
|
|
289
|
|
290 Return true if the operation is possible. Leave the builder unchanged
|
|
291 otherwise. */
|
|
292
|
|
293 template<typename T, typename Shape, typename Derived>
|
|
294 bool
|
|
295 vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
|
|
296 T vec1, T vec2,
|
|
297 bool allow_stepped_p)
|
|
298 {
|
|
299 poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
|
300 gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
|
|
301 && known_eq (full_nelts, Derived::nelts_of (vec2)));
|
|
302 /* Conceptually we split the patterns in VEC1 and VEC2 until we have
|
|
303 an equal number for both. Each split pattern requires the same
|
|
304 number of elements per pattern as the original. E.g. splitting:
|
|
305
|
|
306 { 1, 2, 3, ... }
|
|
307
|
|
308 into two gives:
|
|
309
|
|
310 { 1, 3, 5, ... }
|
|
311 { 2, 4, 6, ... }
|
|
312
|
|
313 while splitting:
|
|
314
|
|
315 { 1, 0, ... }
|
|
316
|
|
317 into two gives:
|
|
318
|
|
319 { 1, 0, ... }
|
|
320 { 0, 0, ... }. */
|
|
321 unsigned int npatterns
|
|
322 = least_common_multiple (Derived::npatterns_of (vec1),
|
|
323 Derived::npatterns_of (vec2));
|
|
324 unsigned int nelts_per_pattern
|
|
325 = MAX (Derived::nelts_per_pattern_of (vec1),
|
|
326 Derived::nelts_per_pattern_of (vec2));
|
|
327 if (!allow_stepped_p && nelts_per_pattern > 2)
|
|
328 {
|
|
329 if (!full_nelts.is_constant ())
|
|
330 return false;
|
|
331 npatterns = full_nelts.to_constant ();
|
|
332 nelts_per_pattern = 1;
|
|
333 }
|
|
334 derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
|
335 return true;
|
|
336 }
|
|
337
|
|
338 /* Return the number of elements that the caller needs to operate on in
|
|
339 order to handle a binary operation on vector constants VEC1 and VEC2.
|
|
340 This static function is used instead of new_binary_operation if the
|
|
341 result of the operation is not a constant vector. */
|
|
342
|
|
343 template<typename T, typename Shape, typename Derived>
|
|
344 unsigned int
|
|
345 vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
|
|
346 {
|
|
347 poly_uint64 nelts = Derived::nelts_of (vec1);
|
|
348 gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
|
|
349 /* See new_binary_operation for details. */
|
|
350 unsigned int npatterns
|
|
351 = least_common_multiple (Derived::npatterns_of (vec1),
|
|
352 Derived::npatterns_of (vec2));
|
|
353 unsigned int nelts_per_pattern
|
|
354 = MAX (Derived::nelts_per_pattern_of (vec1),
|
|
355 Derived::nelts_per_pattern_of (vec2));
|
|
356 unsigned HOST_WIDE_INT const_nelts;
|
|
357 if (nelts.is_constant (&const_nelts))
|
|
358 return MIN (npatterns * nelts_per_pattern, const_nelts);
|
|
359 return npatterns * nelts_per_pattern;
|
|
360 }
|
|
361
|
|
362 /* Return the number of leading duplicate elements in the range
|
|
363 [START:END:STEP]. The value is always at least 1. */
|
|
364
|
|
365 template<typename T, typename Shape, typename Derived>
|
|
366 unsigned int
|
|
367 vector_builder<T, Shape, Derived>::count_dups (int start, int end,
|
|
368 int step) const
|
|
369 {
|
|
370 gcc_assert ((end - start) % step == 0);
|
|
371
|
|
372 unsigned int ndups = 1;
|
|
373 for (int i = start + step;
|
|
374 i != end && derived ()->equal_p (elt (i), elt (start));
|
|
375 i += step)
|
|
376 ndups++;
|
|
377 return ndups;
|
|
378 }
|
|
379
|
131
|
380 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
|
|
381 but without changing the underlying vector. */
|
|
382
|
145
|
383 template<typename T, typename Shape, typename Derived>
|
131
|
384 void
|
145
|
385 vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
|
|
386 unsigned int nelts_per_pattern)
|
131
|
387 {
|
|
388 unsigned int old_encoded_nelts = encoded_nelts ();
|
|
389 unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
|
|
390 gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
|
|
391 unsigned int next = new_encoded_nelts - npatterns;
|
|
392 for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
|
|
393 {
|
|
394 derived ()->note_representative (&(*this)[next], (*this)[i]);
|
|
395 next += 1;
|
|
396 if (next == new_encoded_nelts)
|
|
397 next -= npatterns;
|
|
398 }
|
|
399 m_npatterns = npatterns;
|
|
400 m_nelts_per_pattern = nelts_per_pattern;
|
|
401 }
|
|
402
|
|
403 /* Return true if elements [START, END) contain a repeating sequence of
|
|
404 STEP elements. */
|
|
405
|
145
|
406 template<typename T, typename Shape, typename Derived>
|
131
|
407 bool
|
145
|
408 vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
|
|
409 unsigned int end,
|
|
410 unsigned int step)
|
131
|
411 {
|
|
412 for (unsigned int i = start; i < end - step; ++i)
|
|
413 if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
|
|
414 return false;
|
|
415 return true;
|
|
416 }
|
|
417
|
|
418 /* Return true if elements [START, END) contain STEP interleaved linear
|
|
419 series. */
|
|
420
|
145
|
421 template<typename T, typename Shape, typename Derived>
|
131
|
422 bool
|
145
|
423 vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
|
|
424 unsigned int end,
|
|
425 unsigned int step)
|
131
|
426 {
|
|
427 if (!derived ()->allow_steps_p ())
|
|
428 return false;
|
|
429
|
|
430 for (unsigned int i = start + step * 2; i < end; ++i)
|
|
431 {
|
|
432 T elt1 = (*this)[i - step * 2];
|
|
433 T elt2 = (*this)[i - step];
|
|
434 T elt3 = (*this)[i];
|
|
435
|
|
436 if (!derived ()->integral_p (elt1)
|
|
437 || !derived ()->integral_p (elt2)
|
|
438 || !derived ()->integral_p (elt3))
|
|
439 return false;
|
|
440
|
|
441 if (maybe_ne (derived ()->step (elt1, elt2),
|
|
442 derived ()->step (elt2, elt3)))
|
|
443 return false;
|
|
444
|
|
445 if (!derived ()->can_elide_p (elt3))
|
|
446 return false;
|
|
447 }
|
|
448 return true;
|
|
449 }
|
|
450
|
|
451 /* Try to change the number of encoded patterns to NPATTERNS, returning
|
|
452 true on success. */
|
|
453
|
145
|
454 template<typename T, typename Shape, typename Derived>
|
131
|
455 bool
|
145
|
456 vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
|
131
|
457 {
|
|
458 if (m_nelts_per_pattern == 1)
|
|
459 {
|
|
460 /* See whether NPATTERNS is valid with the current 1-element-per-pattern
|
|
461 encoding. */
|
|
462 if (repeating_sequence_p (0, encoded_nelts (), npatterns))
|
|
463 {
|
|
464 reshape (npatterns, 1);
|
|
465 return true;
|
|
466 }
|
|
467
|
|
468 /* We can only increase the number of elements per pattern if all
|
|
469 elements are still encoded explicitly. */
|
|
470 if (!encoded_full_vector_p ())
|
|
471 return false;
|
|
472 }
|
|
473
|
|
474 if (m_nelts_per_pattern <= 2)
|
|
475 {
|
|
476 /* See whether NPATTERNS is valid with a 2-element-per-pattern
|
|
477 encoding. */
|
|
478 if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
|
|
479 {
|
|
480 reshape (npatterns, 2);
|
|
481 return true;
|
|
482 }
|
|
483
|
|
484 /* We can only increase the number of elements per pattern if all
|
|
485 elements are still encoded explicitly. */
|
|
486 if (!encoded_full_vector_p ())
|
|
487 return false;
|
|
488 }
|
|
489
|
|
490 if (m_nelts_per_pattern <= 3)
|
|
491 {
|
|
492 /* See whether we have NPATTERNS interleaved linear series,
|
|
493 giving a 3-element-per-pattern encoding. */
|
|
494 if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
|
|
495 {
|
|
496 reshape (npatterns, 3);
|
|
497 return true;
|
|
498 }
|
|
499 return false;
|
|
500 }
|
|
501
|
|
502 gcc_unreachable ();
|
|
503 }
|
|
504
|
|
505 /* Replace the current encoding with the canonical form. */
|
|
506
|
145
|
507 template<typename T, typename Shape, typename Derived>
|
131
|
508 void
|
145
|
509 vector_builder<T, Shape, Derived>::finalize ()
|
131
|
510 {
|
|
511 /* The encoding requires the same number of elements to come from each
|
|
512 pattern. */
|
|
513 gcc_assert (multiple_p (m_full_nelts, m_npatterns));
|
|
514
|
|
515 /* Allow the caller to build more elements than necessary. For example,
|
|
516 it's often convenient to build a stepped vector from the natural
|
|
517 encoding of three elements even if the vector itself only has two. */
|
|
518 unsigned HOST_WIDE_INT const_full_nelts;
|
|
519 if (m_full_nelts.is_constant (&const_full_nelts)
|
|
520 && const_full_nelts <= encoded_nelts ())
|
|
521 {
|
|
522 m_npatterns = const_full_nelts;
|
|
523 m_nelts_per_pattern = 1;
|
|
524 }
|
|
525
|
|
526 /* Try to whittle down the number of elements per pattern. That is:
|
|
527
|
|
528 1. If we have stepped patterns whose steps are all 0, reduce the
|
|
529 number of elements per pattern from 3 to 2.
|
|
530
|
|
531 2. If we have background fill values that are the same as the
|
|
532 foreground values, reduce the number of elements per pattern
|
|
533 from 2 to 1. */
|
|
534 while (m_nelts_per_pattern > 1
|
|
535 && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
|
|
536 encoded_nelts (), m_npatterns))
|
|
537 /* The last two sequences of M_NPATTERNS elements are equal,
|
|
538 so remove the last one. */
|
|
539 reshape (m_npatterns, m_nelts_per_pattern - 1);
|
|
540
|
|
541 if (pow2p_hwi (m_npatterns))
|
|
542 {
|
|
543 /* Try to halve the number of patterns while doing so gives a
|
|
544 valid pattern. This approach is linear in the number of
|
|
545 elements, whereas searcing from 1 up would be O(n*log(n)).
|
|
546
|
|
547 Each halving step tries to keep the number of elements per pattern
|
|
548 the same. If that isn't possible, and if all elements are still
|
|
549 explicitly encoded, the halving step can instead increase the number
|
|
550 of elements per pattern.
|
|
551
|
|
552 E.g. for:
|
|
553
|
|
554 { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8
|
|
555
|
|
556 we first realize that the second half of the sequence is not
|
|
557 equal to the first, so we cannot maintain 1 element per pattern
|
|
558 for npatterns == 4. Instead we halve the number of patterns
|
|
559 and double the number of elements per pattern, treating this
|
|
560 as a "foreground" { 0, 2, 3, 4 } against a "background" of
|
|
561 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
|
|
562
|
|
563 { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4
|
|
564
|
|
565 Next we realize that this is *not* a foreround of { 0, 2 }
|
|
566 against a background of { 3, 4 | 3, 4 ... }, so the only
|
|
567 remaining option for reducing the number of patterns is
|
|
568 to use a foreground of { 0, 2 } against a stepped background
|
|
569 of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still
|
|
570 haven't elided any elements:
|
|
571
|
|
572 { 0, 2 | 3, 4 | 5, 6 } npatterns == 2
|
|
573
|
|
574 This in turn can be reduced to a foreground of { 0 } against a
|
|
575 stepped background of { 1 | 2 | 3 ... }:
|
|
576
|
|
577 { 0 | 2 | 3 } npatterns == 1
|
|
578
|
|
579 This last step would not have been possible for:
|
|
580
|
|
581 { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */
|
|
582 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
|
583 continue;
|
|
584
|
|
585 /* Builders of arbitrary fixed-length vectors can use:
|
|
586
|
|
587 new_vector (x, x, 1)
|
|
588
|
|
589 so that every element is specified explicitly. Handle cases
|
|
590 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
|
|
591 would be for 2-bit elements. We'll have treated them as
|
|
592 duplicates in the loop above. */
|
|
593 if (m_nelts_per_pattern == 1
|
|
594 && m_full_nelts.is_constant (&const_full_nelts)
|
|
595 && this->length () >= const_full_nelts
|
|
596 && (m_npatterns & 3) == 0
|
|
597 && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
|
|
598 m_npatterns / 4))
|
|
599 {
|
|
600 reshape (m_npatterns / 4, 3);
|
|
601 while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
|
602 continue;
|
|
603 }
|
|
604 }
|
|
605 else
|
|
606 /* For the non-power-of-2 case, do a simple search up from 1. */
|
|
607 for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
|
|
608 if (m_npatterns % i == 0 && try_npatterns (i))
|
|
609 break;
|
|
610 }
|
|
611
|
|
612 #endif
|