Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-sra.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Scalar Replacement of Aggregates (SRA) converts some structure | |
2 references into scalar references, exposing them to the scalar | |
3 optimizers. | |
4 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 | |
5 Free Software Foundation, Inc. | |
6 Contributed by Diego Novillo <dnovillo@redhat.com> | |
7 | |
8 This file is part of GCC. | |
9 | |
10 GCC is free software; you can redistribute it and/or modify it | |
11 under the terms of the GNU General Public License as published by the | |
12 Free Software Foundation; either version 3, or (at your option) any | |
13 later version. | |
14 | |
15 GCC is distributed in the hope that it will be useful, but WITHOUT | |
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with GCC; see the file COPYING3. If not see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 #include "config.h" | |
25 #include "system.h" | |
26 #include "coretypes.h" | |
27 #include "tm.h" | |
28 #include "ggc.h" | |
29 #include "tree.h" | |
30 | |
31 /* These RTL headers are needed for basic-block.h. */ | |
32 #include "rtl.h" | |
33 #include "tm_p.h" | |
34 #include "hard-reg-set.h" | |
35 #include "basic-block.h" | |
36 #include "diagnostic.h" | |
37 #include "langhooks.h" | |
38 #include "tree-inline.h" | |
39 #include "tree-flow.h" | |
40 #include "gimple.h" | |
41 #include "tree-dump.h" | |
42 #include "tree-pass.h" | |
43 #include "timevar.h" | |
44 #include "flags.h" | |
45 #include "bitmap.h" | |
46 #include "obstack.h" | |
47 #include "target.h" | |
48 /* expr.h is needed for MOVE_RATIO. */ | |
49 #include "expr.h" | |
50 #include "params.h" | |
51 | |
52 | |
53 /* This object of this pass is to replace a non-addressable aggregate with a | |
54 set of independent variables. Most of the time, all of these variables | |
55 will be scalars. But a secondary objective is to break up larger | |
56 aggregates into smaller aggregates. In the process we may find that some | |
57 bits of the larger aggregate can be deleted as unreferenced. | |
58 | |
59 This substitution is done globally. More localized substitutions would | |
60 be the purvey of a load-store motion pass. | |
61 | |
62 The optimization proceeds in phases: | |
63 | |
64 (1) Identify variables that have types that are candidates for | |
65 decomposition. | |
66 | |
67 (2) Scan the function looking for the ways these variables are used. | |
68 In particular we're interested in the number of times a variable | |
69 (or member) is needed as a complete unit, and the number of times | |
70 a variable (or member) is copied. | |
71 | |
72 (3) Based on the usage profile, instantiate substitution variables. | |
73 | |
74 (4) Scan the function making replacements. | |
75 */ | |
76 | |
77 | |
78 /* True if this is the "early" pass, before inlining. */ | |
79 static bool early_sra; | |
80 | |
81 /* The set of todo flags to return from tree_sra. */ | |
82 static unsigned int todoflags; | |
83 | |
84 /* The set of aggregate variables that are candidates for scalarization. */ | |
85 static bitmap sra_candidates; | |
86 | |
87 /* Set of scalarizable PARM_DECLs that need copy-in operations at the | |
88 beginning of the function. */ | |
89 static bitmap needs_copy_in; | |
90 | |
91 /* Sets of bit pairs that cache type decomposition and instantiation. */ | |
92 static bitmap sra_type_decomp_cache; | |
93 static bitmap sra_type_inst_cache; | |
94 | |
95 /* One of these structures is created for each candidate aggregate and | |
96 each (accessed) member or group of members of such an aggregate. */ | |
97 struct sra_elt | |
98 { | |
99 /* A tree of the elements. Used when we want to traverse everything. */ | |
100 struct sra_elt *parent; | |
101 struct sra_elt *groups; | |
102 struct sra_elt *children; | |
103 struct sra_elt *sibling; | |
104 | |
105 /* If this element is a root, then this is the VAR_DECL. If this is | |
106 a sub-element, this is some token used to identify the reference. | |
107 In the case of COMPONENT_REF, this is the FIELD_DECL. In the case | |
108 of an ARRAY_REF, this is the (constant) index. In the case of an | |
109 ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case | |
110 of a complex number, this is a zero or one. */ | |
111 tree element; | |
112 | |
113 /* The type of the element. */ | |
114 tree type; | |
115 | |
116 /* A VAR_DECL, for any sub-element we've decided to replace. */ | |
117 tree replacement; | |
118 | |
119 /* The number of times the element is referenced as a whole. I.e. | |
120 given "a.b.c", this would be incremented for C, but not for A or B. */ | |
121 unsigned int n_uses; | |
122 | |
123 /* The number of times the element is copied to or from another | |
124 scalarizable element. */ | |
125 unsigned int n_copies; | |
126 | |
127 /* True if TYPE is scalar. */ | |
128 bool is_scalar; | |
129 | |
130 /* True if this element is a group of members of its parent. */ | |
131 bool is_group; | |
132 | |
133 /* True if we saw something about this element that prevents scalarization, | |
134 such as non-constant indexing. */ | |
135 bool cannot_scalarize; | |
136 | |
137 /* True if we've decided that structure-to-structure assignment | |
138 should happen via memcpy and not per-element. */ | |
139 bool use_block_copy; | |
140 | |
141 /* True if everything under this element has been marked TREE_NO_WARNING. */ | |
142 bool all_no_warning; | |
143 | |
144 /* A flag for use with/after random access traversals. */ | |
145 bool visited; | |
146 | |
147 /* True if there is BIT_FIELD_REF on the lhs with a vector. */ | |
148 bool is_vector_lhs; | |
149 | |
150 /* 1 if the element is a field that is part of a block, 2 if the field | |
151 is the block itself, 0 if it's neither. */ | |
152 char in_bitfld_block; | |
153 }; | |
154 | |
155 #define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR) | |
156 | |
157 #define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \ | |
158 for ((CHILD) = (ELT)->is_group \ | |
159 ? next_child_for_group (NULL, (ELT)) \ | |
160 : (ELT)->children; \ | |
161 (CHILD); \ | |
162 (CHILD) = (ELT)->is_group \ | |
163 ? next_child_for_group ((CHILD), (ELT)) \ | |
164 : (CHILD)->sibling) | |
165 | |
166 /* Helper function for above macro. Return next child in group. */ | |
167 static struct sra_elt * | |
168 next_child_for_group (struct sra_elt *child, struct sra_elt *group) | |
169 { | |
170 gcc_assert (group->is_group); | |
171 | |
172 /* Find the next child in the parent. */ | |
173 if (child) | |
174 child = child->sibling; | |
175 else | |
176 child = group->parent->children; | |
177 | |
178 /* Skip siblings that do not belong to the group. */ | |
179 while (child) | |
180 { | |
181 tree g_elt = group->element; | |
182 if (TREE_CODE (g_elt) == RANGE_EXPR) | |
183 { | |
184 if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0)) | |
185 && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element)) | |
186 break; | |
187 } | |
188 else | |
189 gcc_unreachable (); | |
190 | |
191 child = child->sibling; | |
192 } | |
193 | |
194 return child; | |
195 } | |
196 | |
197 /* Random access to the child of a parent is performed by hashing. | |
198 This prevents quadratic behavior, and allows SRA to function | |
199 reasonably on larger records. */ | |
200 static htab_t sra_map; | |
201 | |
202 /* All structures are allocated out of the following obstack. */ | |
203 static struct obstack sra_obstack; | |
204 | |
205 /* Debugging functions. */ | |
206 static void dump_sra_elt_name (FILE *, struct sra_elt *); | |
207 extern void debug_sra_elt_name (struct sra_elt *); | |
208 | |
209 /* Forward declarations. */ | |
210 static tree generate_element_ref (struct sra_elt *); | |
211 static gimple_seq sra_build_assignment (tree dst, tree src); | |
212 static void mark_all_v_defs_seq (gimple_seq); | |
213 static void mark_all_v_defs_stmt (gimple); | |
214 | |
215 | |
216 /* Return true if DECL is an SRA candidate. */ | |
217 | |
218 static bool | |
219 is_sra_candidate_decl (tree decl) | |
220 { | |
221 return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl)); | |
222 } | |
223 | |
224 /* Return true if TYPE is a scalar type. */ | |
225 | |
226 static bool | |
227 is_sra_scalar_type (tree type) | |
228 { | |
229 enum tree_code code = TREE_CODE (type); | |
230 return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE | |
231 || code == FIXED_POINT_TYPE | |
232 || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE | |
233 || code == POINTER_TYPE || code == OFFSET_TYPE | |
234 || code == REFERENCE_TYPE); | |
235 } | |
236 | |
237 /* Return true if TYPE can be decomposed into a set of independent variables. | |
238 | |
239 Note that this doesn't imply that all elements of TYPE can be | |
240 instantiated, just that if we decide to break up the type into | |
241 separate pieces that it can be done. */ | |
242 | |
243 bool | |
244 sra_type_can_be_decomposed_p (tree type) | |
245 { | |
246 unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; | |
247 tree t; | |
248 | |
249 /* Avoid searching the same type twice. */ | |
250 if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) | |
251 return true; | |
252 if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) | |
253 return false; | |
254 | |
255 /* The type must have a definite nonzero size. */ | |
256 if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | |
257 || integer_zerop (TYPE_SIZE (type))) | |
258 goto fail; | |
259 | |
260 /* The type must be a non-union aggregate. */ | |
261 switch (TREE_CODE (type)) | |
262 { | |
263 case RECORD_TYPE: | |
264 { | |
265 bool saw_one_field = false; | |
266 | |
267 for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) | |
268 if (TREE_CODE (t) == FIELD_DECL) | |
269 { | |
270 /* Reject incorrectly represented bit fields. */ | |
271 if (DECL_BIT_FIELD (t) | |
272 && INTEGRAL_TYPE_P (TREE_TYPE (t)) | |
273 && (tree_low_cst (DECL_SIZE (t), 1) | |
274 != TYPE_PRECISION (TREE_TYPE (t)))) | |
275 goto fail; | |
276 | |
277 saw_one_field = true; | |
278 } | |
279 | |
280 /* Record types must have at least one field. */ | |
281 if (!saw_one_field) | |
282 goto fail; | |
283 } | |
284 break; | |
285 | |
286 case ARRAY_TYPE: | |
287 /* Array types must have a fixed lower and upper bound. */ | |
288 t = TYPE_DOMAIN (type); | |
289 if (t == NULL) | |
290 goto fail; | |
291 if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) | |
292 goto fail; | |
293 if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) | |
294 goto fail; | |
295 break; | |
296 | |
297 case COMPLEX_TYPE: | |
298 break; | |
299 | |
300 default: | |
301 goto fail; | |
302 } | |
303 | |
304 bitmap_set_bit (sra_type_decomp_cache, cache+0); | |
305 return true; | |
306 | |
307 fail: | |
308 bitmap_set_bit (sra_type_decomp_cache, cache+1); | |
309 return false; | |
310 } | |
311 | |
312 /* Returns true if the TYPE is one of the available va_list types. | |
313 Otherwise it returns false. | |
314 Note, that for multiple calling conventions there can be more | |
315 than just one va_list type present. */ | |
316 | |
317 static bool | |
318 is_va_list_type (tree type) | |
319 { | |
320 tree h; | |
321 | |
322 if (type == NULL_TREE) | |
323 return false; | |
324 h = targetm.canonical_va_list_type (type); | |
325 if (h == NULL_TREE) | |
326 return false; | |
327 if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (h)) | |
328 return true; | |
329 return false; | |
330 } | |
331 | |
332 /* Return true if DECL can be decomposed into a set of independent | |
333 (though not necessarily scalar) variables. */ | |
334 | |
335 static bool | |
336 decl_can_be_decomposed_p (tree var) | |
337 { | |
338 /* Early out for scalars. */ | |
339 if (is_sra_scalar_type (TREE_TYPE (var))) | |
340 return false; | |
341 | |
342 /* The variable must not be aliased. */ | |
343 if (!is_gimple_non_addressable (var)) | |
344 { | |
345 if (dump_file && (dump_flags & TDF_DETAILS)) | |
346 { | |
347 fprintf (dump_file, "Cannot scalarize variable "); | |
348 print_generic_expr (dump_file, var, dump_flags); | |
349 fprintf (dump_file, " because it must live in memory\n"); | |
350 } | |
351 return false; | |
352 } | |
353 | |
354 /* The variable must not be volatile. */ | |
355 if (TREE_THIS_VOLATILE (var)) | |
356 { | |
357 if (dump_file && (dump_flags & TDF_DETAILS)) | |
358 { | |
359 fprintf (dump_file, "Cannot scalarize variable "); | |
360 print_generic_expr (dump_file, var, dump_flags); | |
361 fprintf (dump_file, " because it is declared volatile\n"); | |
362 } | |
363 return false; | |
364 } | |
365 | |
366 /* We must be able to decompose the variable's type. */ | |
367 if (!sra_type_can_be_decomposed_p (TREE_TYPE (var))) | |
368 { | |
369 if (dump_file && (dump_flags & TDF_DETAILS)) | |
370 { | |
371 fprintf (dump_file, "Cannot scalarize variable "); | |
372 print_generic_expr (dump_file, var, dump_flags); | |
373 fprintf (dump_file, " because its type cannot be decomposed\n"); | |
374 } | |
375 return false; | |
376 } | |
377 | |
378 /* HACK: if we decompose a va_list_type_node before inlining, then we'll | |
379 confuse tree-stdarg.c, and we won't be able to figure out which and | |
380 how many arguments are accessed. This really should be improved in | |
381 tree-stdarg.c, as the decomposition is truly a win. This could also | |
382 be fixed if the stdarg pass ran early, but this can't be done until | |
383 we've aliasing information early too. See PR 30791. */ | |
384 if (early_sra && is_va_list_type (TREE_TYPE (var))) | |
385 return false; | |
386 | |
387 return true; | |
388 } | |
389 | |
390 /* Return true if TYPE can be *completely* decomposed into scalars. */ | |
391 | |
392 static bool | |
393 type_can_instantiate_all_elements (tree type) | |
394 { | |
395 if (is_sra_scalar_type (type)) | |
396 return true; | |
397 if (!sra_type_can_be_decomposed_p (type)) | |
398 return false; | |
399 | |
400 switch (TREE_CODE (type)) | |
401 { | |
402 case RECORD_TYPE: | |
403 { | |
404 unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; | |
405 tree f; | |
406 | |
407 if (bitmap_bit_p (sra_type_inst_cache, cache+0)) | |
408 return true; | |
409 if (bitmap_bit_p (sra_type_inst_cache, cache+1)) | |
410 return false; | |
411 | |
412 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) | |
413 if (TREE_CODE (f) == FIELD_DECL) | |
414 { | |
415 if (!type_can_instantiate_all_elements (TREE_TYPE (f))) | |
416 { | |
417 bitmap_set_bit (sra_type_inst_cache, cache+1); | |
418 return false; | |
419 } | |
420 } | |
421 | |
422 bitmap_set_bit (sra_type_inst_cache, cache+0); | |
423 return true; | |
424 } | |
425 | |
426 case ARRAY_TYPE: | |
427 return type_can_instantiate_all_elements (TREE_TYPE (type)); | |
428 | |
429 case COMPLEX_TYPE: | |
430 return true; | |
431 | |
432 default: | |
433 gcc_unreachable (); | |
434 } | |
435 } | |
436 | |
437 /* Test whether ELT or some sub-element cannot be scalarized. */ | |
438 | |
439 static bool | |
440 can_completely_scalarize_p (struct sra_elt *elt) | |
441 { | |
442 struct sra_elt *c; | |
443 | |
444 if (elt->cannot_scalarize) | |
445 return false; | |
446 | |
447 for (c = elt->children; c; c = c->sibling) | |
448 if (!can_completely_scalarize_p (c)) | |
449 return false; | |
450 | |
451 for (c = elt->groups; c; c = c->sibling) | |
452 if (!can_completely_scalarize_p (c)) | |
453 return false; | |
454 | |
455 return true; | |
456 } | |
457 | |
458 | |
459 /* A simplified tree hashing algorithm that only handles the types of | |
460 trees we expect to find in sra_elt->element. */ | |
461 | |
462 static hashval_t | |
463 sra_hash_tree (tree t) | |
464 { | |
465 hashval_t h; | |
466 | |
467 switch (TREE_CODE (t)) | |
468 { | |
469 case VAR_DECL: | |
470 case PARM_DECL: | |
471 case RESULT_DECL: | |
472 h = DECL_UID (t); | |
473 break; | |
474 | |
475 case INTEGER_CST: | |
476 h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); | |
477 break; | |
478 | |
479 case RANGE_EXPR: | |
480 h = iterative_hash_expr (TREE_OPERAND (t, 0), 0); | |
481 h = iterative_hash_expr (TREE_OPERAND (t, 1), h); | |
482 break; | |
483 | |
484 case FIELD_DECL: | |
485 /* We can have types that are compatible, but have different member | |
486 lists, so we can't hash fields by ID. Use offsets instead. */ | |
487 h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0); | |
488 h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h); | |
489 break; | |
490 | |
491 case BIT_FIELD_REF: | |
492 /* Don't take operand 0 into account, that's our parent. */ | |
493 h = iterative_hash_expr (TREE_OPERAND (t, 1), 0); | |
494 h = iterative_hash_expr (TREE_OPERAND (t, 2), h); | |
495 break; | |
496 | |
497 default: | |
498 gcc_unreachable (); | |
499 } | |
500 | |
501 return h; | |
502 } | |
503 | |
504 /* Hash function for type SRA_PAIR. */ | |
505 | |
506 static hashval_t | |
507 sra_elt_hash (const void *x) | |
508 { | |
509 const struct sra_elt *const e = (const struct sra_elt *) x; | |
510 const struct sra_elt *p; | |
511 hashval_t h; | |
512 | |
513 h = sra_hash_tree (e->element); | |
514 | |
515 /* Take into account everything except bitfield blocks back up the | |
516 chain. Given that chain lengths are rarely very long, this | |
517 should be acceptable. If we truly identify this as a performance | |
518 problem, it should work to hash the pointer value | |
519 "e->parent". */ | |
520 for (p = e->parent; p ; p = p->parent) | |
521 if (!p->in_bitfld_block) | |
522 h = (h * 65521) ^ sra_hash_tree (p->element); | |
523 | |
524 return h; | |
525 } | |
526 | |
527 /* Equality function for type SRA_PAIR. */ | |
528 | |
529 static int | |
530 sra_elt_eq (const void *x, const void *y) | |
531 { | |
532 const struct sra_elt *const a = (const struct sra_elt *) x; | |
533 const struct sra_elt *const b = (const struct sra_elt *) y; | |
534 tree ae, be; | |
535 const struct sra_elt *ap = a->parent; | |
536 const struct sra_elt *bp = b->parent; | |
537 | |
538 if (ap) | |
539 while (ap->in_bitfld_block) | |
540 ap = ap->parent; | |
541 if (bp) | |
542 while (bp->in_bitfld_block) | |
543 bp = bp->parent; | |
544 | |
545 if (ap != bp) | |
546 return false; | |
547 | |
548 ae = a->element; | |
549 be = b->element; | |
550 | |
551 if (ae == be) | |
552 return true; | |
553 if (TREE_CODE (ae) != TREE_CODE (be)) | |
554 return false; | |
555 | |
556 switch (TREE_CODE (ae)) | |
557 { | |
558 case VAR_DECL: | |
559 case PARM_DECL: | |
560 case RESULT_DECL: | |
561 /* These are all pointer unique. */ | |
562 return false; | |
563 | |
564 case INTEGER_CST: | |
565 /* Integers are not pointer unique, so compare their values. */ | |
566 return tree_int_cst_equal (ae, be); | |
567 | |
568 case RANGE_EXPR: | |
569 return | |
570 tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0)) | |
571 && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)); | |
572 | |
573 case FIELD_DECL: | |
574 /* Fields are unique within a record, but not between | |
575 compatible records. */ | |
576 if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be)) | |
577 return false; | |
578 return fields_compatible_p (ae, be); | |
579 | |
580 case BIT_FIELD_REF: | |
581 return | |
582 tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)) | |
583 && tree_int_cst_equal (TREE_OPERAND (ae, 2), TREE_OPERAND (be, 2)); | |
584 | |
585 default: | |
586 gcc_unreachable (); | |
587 } | |
588 } | |
589 | |
590 /* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT | |
591 may be null, in which case CHILD must be a DECL. */ | |
592 | |
593 static struct sra_elt * | |
594 lookup_element (struct sra_elt *parent, tree child, tree type, | |
595 enum insert_option insert) | |
596 { | |
597 struct sra_elt dummy; | |
598 struct sra_elt **slot; | |
599 struct sra_elt *elt; | |
600 | |
601 if (parent) | |
602 dummy.parent = parent->is_group ? parent->parent : parent; | |
603 else | |
604 dummy.parent = NULL; | |
605 dummy.element = child; | |
606 | |
607 slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); | |
608 if (!slot && insert == NO_INSERT) | |
609 return NULL; | |
610 | |
611 elt = *slot; | |
612 if (!elt && insert == INSERT) | |
613 { | |
614 *slot = elt = XOBNEW (&sra_obstack, struct sra_elt); | |
615 memset (elt, 0, sizeof (*elt)); | |
616 | |
617 elt->parent = parent; | |
618 elt->element = child; | |
619 elt->type = type; | |
620 elt->is_scalar = is_sra_scalar_type (type); | |
621 | |
622 if (parent) | |
623 { | |
624 if (IS_ELEMENT_FOR_GROUP (elt->element)) | |
625 { | |
626 elt->is_group = true; | |
627 elt->sibling = parent->groups; | |
628 parent->groups = elt; | |
629 } | |
630 else | |
631 { | |
632 elt->sibling = parent->children; | |
633 parent->children = elt; | |
634 } | |
635 } | |
636 | |
637 /* If this is a parameter, then if we want to scalarize, we have | |
638 one copy from the true function parameter. Count it now. */ | |
639 if (TREE_CODE (child) == PARM_DECL) | |
640 { | |
641 elt->n_copies = 1; | |
642 bitmap_set_bit (needs_copy_in, DECL_UID (child)); | |
643 } | |
644 } | |
645 | |
646 return elt; | |
647 } | |
648 | |
649 /* Create or return the SRA_ELT structure for EXPR if the expression | |
650 refers to a scalarizable variable. */ | |
651 | |
652 static struct sra_elt * | |
653 maybe_lookup_element_for_expr (tree expr) | |
654 { | |
655 struct sra_elt *elt; | |
656 tree child; | |
657 | |
658 switch (TREE_CODE (expr)) | |
659 { | |
660 case VAR_DECL: | |
661 case PARM_DECL: | |
662 case RESULT_DECL: | |
663 if (is_sra_candidate_decl (expr)) | |
664 return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); | |
665 return NULL; | |
666 | |
667 case ARRAY_REF: | |
668 /* We can't scalarize variable array indices. */ | |
669 if (in_array_bounds_p (expr)) | |
670 child = TREE_OPERAND (expr, 1); | |
671 else | |
672 return NULL; | |
673 break; | |
674 | |
675 case ARRAY_RANGE_REF: | |
676 /* We can't scalarize variable array indices. */ | |
677 if (range_in_array_bounds_p (expr)) | |
678 { | |
679 tree domain = TYPE_DOMAIN (TREE_TYPE (expr)); | |
680 child = build2 (RANGE_EXPR, integer_type_node, | |
681 TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain)); | |
682 } | |
683 else | |
684 return NULL; | |
685 break; | |
686 | |
687 case COMPONENT_REF: | |
688 { | |
689 tree type = TREE_TYPE (TREE_OPERAND (expr, 0)); | |
690 /* Don't look through unions. */ | |
691 if (TREE_CODE (type) != RECORD_TYPE) | |
692 return NULL; | |
693 /* Neither through variable-sized records. */ | |
694 if (TYPE_SIZE (type) == NULL_TREE | |
695 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
696 return NULL; | |
697 child = TREE_OPERAND (expr, 1); | |
698 } | |
699 break; | |
700 | |
701 case REALPART_EXPR: | |
702 child = integer_zero_node; | |
703 break; | |
704 case IMAGPART_EXPR: | |
705 child = integer_one_node; | |
706 break; | |
707 | |
708 default: | |
709 return NULL; | |
710 } | |
711 | |
712 elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); | |
713 if (elt) | |
714 return lookup_element (elt, child, TREE_TYPE (expr), INSERT); | |
715 return NULL; | |
716 } | |
717 | |
718 | |
719 /* Functions to walk just enough of the tree to see all scalarizable | |
720 references, and categorize them. */ | |
721 | |
722 /* A set of callbacks for phases 2 and 4. They'll be invoked for the | |
723 various kinds of references seen. In all cases, *GSI is an iterator | |
724 pointing to the statement being processed. */ | |
725 struct sra_walk_fns | |
726 { | |
727 /* Invoked when ELT is required as a unit. Note that ELT might refer to | |
728 a leaf node, in which case this is a simple scalar reference. *EXPR_P | |
729 points to the location of the expression. IS_OUTPUT is true if this | |
730 is a left-hand-side reference. USE_ALL is true if we saw something we | |
731 couldn't quite identify and had to force the use of the entire object. */ | |
732 void (*use) (struct sra_elt *elt, tree *expr_p, | |
733 gimple_stmt_iterator *gsi, bool is_output, bool use_all); | |
734 | |
735 /* Invoked when we have a copy between two scalarizable references. */ | |
736 void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
737 gimple_stmt_iterator *gsi); | |
738 | |
739 /* Invoked when ELT is initialized from a constant. VALUE may be NULL, | |
740 in which case it should be treated as an empty CONSTRUCTOR. */ | |
741 void (*init) (struct sra_elt *elt, tree value, gimple_stmt_iterator *gsi); | |
742 | |
743 /* Invoked when we have a copy between one scalarizable reference ELT | |
744 and one non-scalarizable reference OTHER without side-effects. | |
745 IS_OUTPUT is true if ELT is on the left-hand side. */ | |
746 void (*ldst) (struct sra_elt *elt, tree other, | |
747 gimple_stmt_iterator *gsi, bool is_output); | |
748 | |
749 /* True during phase 2, false during phase 4. */ | |
750 /* ??? This is a hack. */ | |
751 bool initial_scan; | |
752 }; | |
753 | |
754 #ifdef ENABLE_CHECKING | |
755 /* Invoked via walk_tree, if *TP contains a candidate decl, return it. */ | |
756 | |
757 static tree | |
758 sra_find_candidate_decl (tree *tp, int *walk_subtrees, | |
759 void *data ATTRIBUTE_UNUSED) | |
760 { | |
761 tree t = *tp; | |
762 enum tree_code code = TREE_CODE (t); | |
763 | |
764 if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) | |
765 { | |
766 *walk_subtrees = 0; | |
767 if (is_sra_candidate_decl (t)) | |
768 return t; | |
769 } | |
770 else if (TYPE_P (t)) | |
771 *walk_subtrees = 0; | |
772 | |
773 return NULL; | |
774 } | |
775 #endif | |
776 | |
777 /* Walk most expressions looking for a scalarizable aggregate. | |
778 If we find one, invoke FNS->USE. */ | |
779 | |
780 static void | |
781 sra_walk_expr (tree *expr_p, gimple_stmt_iterator *gsi, bool is_output, | |
782 const struct sra_walk_fns *fns) | |
783 { | |
784 tree expr = *expr_p; | |
785 tree inner = expr; | |
786 bool disable_scalarization = false; | |
787 bool use_all_p = false; | |
788 | |
789 /* We're looking to collect a reference expression between EXPR and INNER, | |
790 such that INNER is a scalarizable decl and all other nodes through EXPR | |
791 are references that we can scalarize. If we come across something that | |
792 we can't scalarize, we reset EXPR. This has the effect of making it | |
793 appear that we're referring to the larger expression as a whole. */ | |
794 | |
795 while (1) | |
796 switch (TREE_CODE (inner)) | |
797 { | |
798 case VAR_DECL: | |
799 case PARM_DECL: | |
800 case RESULT_DECL: | |
801 /* If there is a scalarizable decl at the bottom, then process it. */ | |
802 if (is_sra_candidate_decl (inner)) | |
803 { | |
804 struct sra_elt *elt = maybe_lookup_element_for_expr (expr); | |
805 if (disable_scalarization) | |
806 elt->cannot_scalarize = true; | |
807 else | |
808 fns->use (elt, expr_p, gsi, is_output, use_all_p); | |
809 } | |
810 return; | |
811 | |
812 case ARRAY_REF: | |
813 /* Non-constant index means any member may be accessed. Prevent the | |
814 expression from being scalarized. If we were to treat this as a | |
815 reference to the whole array, we can wind up with a single dynamic | |
816 index reference inside a loop being overridden by several constant | |
817 index references during loop setup. It's possible that this could | |
818 be avoided by using dynamic usage counts based on BB trip counts | |
819 (based on loop analysis or profiling), but that hardly seems worth | |
820 the effort. */ | |
821 /* ??? Hack. Figure out how to push this into the scan routines | |
822 without duplicating too much code. */ | |
823 if (!in_array_bounds_p (inner)) | |
824 { | |
825 disable_scalarization = true; | |
826 goto use_all; | |
827 } | |
828 /* ??? Are we assured that non-constant bounds and stride will have | |
829 the same value everywhere? I don't think Fortran will... */ | |
830 if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) | |
831 goto use_all; | |
832 inner = TREE_OPERAND (inner, 0); | |
833 break; | |
834 | |
835 case ARRAY_RANGE_REF: | |
836 if (!range_in_array_bounds_p (inner)) | |
837 { | |
838 disable_scalarization = true; | |
839 goto use_all; | |
840 } | |
841 /* ??? See above non-constant bounds and stride . */ | |
842 if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) | |
843 goto use_all; | |
844 inner = TREE_OPERAND (inner, 0); | |
845 break; | |
846 | |
847 case COMPONENT_REF: | |
848 { | |
849 tree type = TREE_TYPE (TREE_OPERAND (inner, 0)); | |
850 /* Don't look through unions. */ | |
851 if (TREE_CODE (type) != RECORD_TYPE) | |
852 goto use_all; | |
853 /* Neither through variable-sized records. */ | |
854 if (TYPE_SIZE (type) == NULL_TREE | |
855 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
856 goto use_all; | |
857 inner = TREE_OPERAND (inner, 0); | |
858 } | |
859 break; | |
860 | |
861 case REALPART_EXPR: | |
862 case IMAGPART_EXPR: | |
863 inner = TREE_OPERAND (inner, 0); | |
864 break; | |
865 | |
866 case BIT_FIELD_REF: | |
867 /* A bit field reference to a specific vector is scalarized but for | |
868 ones for inputs need to be marked as used on the left hand size so | |
869 when we scalarize it, we can mark that variable as non renamable. */ | |
870 if (is_output | |
871 && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE) | |
872 { | |
873 struct sra_elt *elt | |
874 = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0)); | |
875 if (elt) | |
876 elt->is_vector_lhs = true; | |
877 } | |
878 | |
879 /* A bit field reference (access to *multiple* fields simultaneously) | |
880 is not currently scalarized. Consider this an access to the full | |
881 outer element, to which walk_tree will bring us next. */ | |
882 goto use_all; | |
883 | |
884 CASE_CONVERT: | |
885 /* Similarly, a nop explicitly wants to look at an object in a | |
886 type other than the one we've scalarized. */ | |
887 goto use_all; | |
888 | |
889 case VIEW_CONVERT_EXPR: | |
890 /* Likewise for a view conversion, but with an additional twist: | |
891 it can be on the LHS and, in this case, an access to the full | |
892 outer element would mean a killing def. So we need to punt | |
893 if we haven't already a full access to the current element, | |
894 because we cannot pretend to have a killing def if we only | |
895 have a partial access at some level. */ | |
896 if (is_output && !use_all_p && inner != expr) | |
897 disable_scalarization = true; | |
898 goto use_all; | |
899 | |
900 case WITH_SIZE_EXPR: | |
901 /* This is a transparent wrapper. The entire inner expression really | |
902 is being used. */ | |
903 goto use_all; | |
904 | |
905 use_all: | |
906 expr_p = &TREE_OPERAND (inner, 0); | |
907 inner = expr = *expr_p; | |
908 use_all_p = true; | |
909 break; | |
910 | |
911 default: | |
912 #ifdef ENABLE_CHECKING | |
913 /* Validate that we're not missing any references. */ | |
914 gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)); | |
915 #endif | |
916 return; | |
917 } | |
918 } | |
919 | |
920 /* Walk the arguments of a GIMPLE_CALL looking for scalarizable aggregates. | |
921 If we find one, invoke FNS->USE. */ | |
922 | |
923 static void | |
924 sra_walk_gimple_call (gimple stmt, gimple_stmt_iterator *gsi, | |
925 const struct sra_walk_fns *fns) | |
926 { | |
927 int i; | |
928 int nargs = gimple_call_num_args (stmt); | |
929 | |
930 for (i = 0; i < nargs; i++) | |
931 sra_walk_expr (gimple_call_arg_ptr (stmt, i), gsi, false, fns); | |
932 | |
933 if (gimple_call_lhs (stmt)) | |
934 sra_walk_expr (gimple_call_lhs_ptr (stmt), gsi, true, fns); | |
935 } | |
936 | |
937 /* Walk the inputs and outputs of a GIMPLE_ASM looking for scalarizable | |
938 aggregates. If we find one, invoke FNS->USE. */ | |
939 | |
940 static void | |
941 sra_walk_gimple_asm (gimple stmt, gimple_stmt_iterator *gsi, | |
942 const struct sra_walk_fns *fns) | |
943 { | |
944 size_t i; | |
945 for (i = 0; i < gimple_asm_ninputs (stmt); i++) | |
946 sra_walk_expr (&TREE_VALUE (gimple_asm_input_op (stmt, i)), gsi, false, fns); | |
947 for (i = 0; i < gimple_asm_noutputs (stmt); i++) | |
948 sra_walk_expr (&TREE_VALUE (gimple_asm_output_op (stmt, i)), gsi, true, fns); | |
949 } | |
950 | |
951 /* Walk a GIMPLE_ASSIGN and categorize the assignment appropriately. */ | |
952 | |
953 static void | |
954 sra_walk_gimple_assign (gimple stmt, gimple_stmt_iterator *gsi, | |
955 const struct sra_walk_fns *fns) | |
956 { | |
957 struct sra_elt *lhs_elt = NULL, *rhs_elt = NULL; | |
958 tree lhs, rhs; | |
959 | |
960 /* If there is more than 1 element on the RHS, only walk the lhs. */ | |
961 if (!gimple_assign_single_p (stmt)) | |
962 { | |
963 sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); | |
964 return; | |
965 } | |
966 | |
967 lhs = gimple_assign_lhs (stmt); | |
968 rhs = gimple_assign_rhs1 (stmt); | |
969 lhs_elt = maybe_lookup_element_for_expr (lhs); | |
970 rhs_elt = maybe_lookup_element_for_expr (rhs); | |
971 | |
972 /* If both sides are scalarizable, this is a COPY operation. */ | |
973 if (lhs_elt && rhs_elt) | |
974 { | |
975 fns->copy (lhs_elt, rhs_elt, gsi); | |
976 return; | |
977 } | |
978 | |
979 /* If the RHS is scalarizable, handle it. There are only two cases. */ | |
980 if (rhs_elt) | |
981 { | |
982 if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs)) | |
983 fns->ldst (rhs_elt, lhs, gsi, false); | |
984 else | |
985 fns->use (rhs_elt, gimple_assign_rhs1_ptr (stmt), gsi, false, false); | |
986 } | |
987 | |
988 /* If it isn't scalarizable, there may be scalarizable variables within, so | |
989 check for a call or else walk the RHS to see if we need to do any | |
990 copy-in operations. We need to do it before the LHS is scalarized so | |
991 that the statements get inserted in the proper place, before any | |
992 copy-out operations. */ | |
993 else | |
994 sra_walk_expr (gimple_assign_rhs1_ptr (stmt), gsi, false, fns); | |
995 | |
996 /* Likewise, handle the LHS being scalarizable. We have cases similar | |
997 to those above, but also want to handle RHS being constant. */ | |
998 if (lhs_elt) | |
999 { | |
1000 /* If this is an assignment from a constant, or constructor, then | |
1001 we have access to all of the elements individually. Invoke INIT. */ | |
1002 if (TREE_CODE (rhs) == COMPLEX_EXPR | |
1003 || TREE_CODE (rhs) == COMPLEX_CST | |
1004 || TREE_CODE (rhs) == CONSTRUCTOR) | |
1005 fns->init (lhs_elt, rhs, gsi); | |
1006 | |
1007 /* If this is an assignment from read-only memory, treat this as if | |
1008 we'd been passed the constructor directly. Invoke INIT. */ | |
1009 else if (TREE_CODE (rhs) == VAR_DECL | |
1010 && TREE_STATIC (rhs) | |
1011 && !DECL_EXTERNAL (rhs) | |
1012 && TREE_READONLY (rhs) | |
1013 && targetm.binds_local_p (rhs)) | |
1014 fns->init (lhs_elt, DECL_INITIAL (rhs), gsi); | |
1015 | |
1016 /* If this is a copy from a non-scalarizable lvalue, invoke LDST. | |
1017 The lvalue requirement prevents us from trying to directly scalarize | |
1018 the result of a function call. Which would result in trying to call | |
1019 the function multiple times, and other evil things. */ | |
1020 else if (!lhs_elt->is_scalar | |
1021 && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs)) | |
1022 fns->ldst (lhs_elt, rhs, gsi, true); | |
1023 | |
1024 /* Otherwise we're being used in some context that requires the | |
1025 aggregate to be seen as a whole. Invoke USE. */ | |
1026 else | |
1027 fns->use (lhs_elt, gimple_assign_lhs_ptr (stmt), gsi, true, false); | |
1028 } | |
1029 | |
1030 /* Similarly to above, LHS_ELT being null only means that the LHS as a | |
1031 whole is not a scalarizable reference. There may be occurrences of | |
1032 scalarizable variables within, which implies a USE. */ | |
1033 else | |
1034 sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); | |
1035 } | |
1036 | |
1037 /* Entry point to the walk functions. Search the entire function, | |
1038 invoking the callbacks in FNS on each of the references to | |
1039 scalarizable variables. */ | |
1040 | |
1041 static void | |
1042 sra_walk_function (const struct sra_walk_fns *fns) | |
1043 { | |
1044 basic_block bb; | |
1045 gimple_stmt_iterator si, ni; | |
1046 | |
1047 /* ??? Phase 4 could derive some benefit to walking the function in | |
1048 dominator tree order. */ | |
1049 | |
1050 FOR_EACH_BB (bb) | |
1051 for (si = gsi_start_bb (bb); !gsi_end_p (si); si = ni) | |
1052 { | |
1053 gimple stmt; | |
1054 | |
1055 stmt = gsi_stmt (si); | |
1056 | |
1057 ni = si; | |
1058 gsi_next (&ni); | |
1059 | |
1060 /* If the statement has no virtual operands, then it doesn't | |
1061 make any structure references that we care about. */ | |
1062 if (gimple_aliases_computed_p (cfun) | |
1063 && ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))) | |
1064 continue; | |
1065 | |
1066 switch (gimple_code (stmt)) | |
1067 { | |
1068 case GIMPLE_RETURN: | |
1069 /* If we have "return <retval>" then the return value is | |
1070 already exposed for our pleasure. Walk it as a USE to | |
1071 force all the components back in place for the return. | |
1072 */ | |
1073 if (gimple_return_retval (stmt) == NULL_TREE) | |
1074 ; | |
1075 else | |
1076 sra_walk_expr (gimple_return_retval_ptr (stmt), &si, false, | |
1077 fns); | |
1078 break; | |
1079 | |
1080 case GIMPLE_ASSIGN: | |
1081 sra_walk_gimple_assign (stmt, &si, fns); | |
1082 break; | |
1083 case GIMPLE_CALL: | |
1084 sra_walk_gimple_call (stmt, &si, fns); | |
1085 break; | |
1086 case GIMPLE_ASM: | |
1087 sra_walk_gimple_asm (stmt, &si, fns); | |
1088 break; | |
1089 | |
1090 default: | |
1091 break; | |
1092 } | |
1093 } | |
1094 } | |
1095 | |
1096 /* Phase One: Scan all referenced variables in the program looking for | |
1097 structures that could be decomposed. */ | |
1098 | |
1099 static bool | |
1100 find_candidates_for_sra (void) | |
1101 { | |
1102 bool any_set = false; | |
1103 tree var; | |
1104 referenced_var_iterator rvi; | |
1105 | |
1106 FOR_EACH_REFERENCED_VAR (var, rvi) | |
1107 { | |
1108 if (decl_can_be_decomposed_p (var)) | |
1109 { | |
1110 bitmap_set_bit (sra_candidates, DECL_UID (var)); | |
1111 any_set = true; | |
1112 } | |
1113 } | |
1114 | |
1115 return any_set; | |
1116 } | |
1117 | |
1118 | |
1119 /* Phase Two: Scan all references to scalarizable variables. Count the | |
1120 number of times they are used or copied respectively. */ | |
1121 | |
1122 /* Callbacks to fill in SRA_WALK_FNS. Everything but USE is | |
1123 considered a copy, because we can decompose the reference such that | |
1124 the sub-elements needn't be contiguous. */ | |
1125 | |
1126 static void | |
1127 scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, | |
1128 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, | |
1129 bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED) | |
1130 { | |
1131 elt->n_uses += 1; | |
1132 } | |
1133 | |
1134 static void | |
1135 scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
1136 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) | |
1137 { | |
1138 lhs_elt->n_copies += 1; | |
1139 rhs_elt->n_copies += 1; | |
1140 } | |
1141 | |
1142 static void | |
1143 scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, | |
1144 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) | |
1145 { | |
1146 lhs_elt->n_copies += 1; | |
1147 } | |
1148 | |
1149 static void | |
1150 scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, | |
1151 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, | |
1152 bool is_output ATTRIBUTE_UNUSED) | |
1153 { | |
1154 elt->n_copies += 1; | |
1155 } | |
1156 | |
1157 /* Dump the values we collected during the scanning phase. */ | |
1158 | |
1159 static void | |
1160 scan_dump (struct sra_elt *elt) | |
1161 { | |
1162 struct sra_elt *c; | |
1163 | |
1164 dump_sra_elt_name (dump_file, elt); | |
1165 fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); | |
1166 | |
1167 for (c = elt->children; c ; c = c->sibling) | |
1168 scan_dump (c); | |
1169 | |
1170 for (c = elt->groups; c ; c = c->sibling) | |
1171 scan_dump (c); | |
1172 } | |
1173 | |
1174 /* Entry point to phase 2. Scan the entire function, building up | |
1175 scalarization data structures, recording copies and uses. */ | |
1176 | |
1177 static void | |
1178 scan_function (void) | |
1179 { | |
1180 static const struct sra_walk_fns fns = { | |
1181 scan_use, scan_copy, scan_init, scan_ldst, true | |
1182 }; | |
1183 bitmap_iterator bi; | |
1184 | |
1185 sra_walk_function (&fns); | |
1186 | |
1187 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1188 { | |
1189 unsigned i; | |
1190 | |
1191 fputs ("\nScan results:\n", dump_file); | |
1192 EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) | |
1193 { | |
1194 tree var = referenced_var (i); | |
1195 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
1196 if (elt) | |
1197 scan_dump (elt); | |
1198 } | |
1199 fputc ('\n', dump_file); | |
1200 } | |
1201 } | |
1202 | |
1203 /* Phase Three: Make decisions about which variables to scalarize, if any. | |
1204 All elements to be scalarized have replacement variables made for them. */ | |
1205 | |
1206 /* A subroutine of build_element_name. Recursively build the element | |
1207 name on the obstack. */ | |
1208 | |
1209 static void | |
1210 build_element_name_1 (struct sra_elt *elt) | |
1211 { | |
1212 tree t; | |
1213 char buffer[32]; | |
1214 | |
1215 if (elt->parent) | |
1216 { | |
1217 build_element_name_1 (elt->parent); | |
1218 obstack_1grow (&sra_obstack, '$'); | |
1219 | |
1220 if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) | |
1221 { | |
1222 if (elt->element == integer_zero_node) | |
1223 obstack_grow (&sra_obstack, "real", 4); | |
1224 else | |
1225 obstack_grow (&sra_obstack, "imag", 4); | |
1226 return; | |
1227 } | |
1228 } | |
1229 | |
1230 t = elt->element; | |
1231 if (TREE_CODE (t) == INTEGER_CST) | |
1232 { | |
1233 /* ??? Eh. Don't bother doing double-wide printing. */ | |
1234 sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); | |
1235 obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1236 } | |
1237 else if (TREE_CODE (t) == BIT_FIELD_REF) | |
1238 { | |
1239 sprintf (buffer, "B" HOST_WIDE_INT_PRINT_DEC, | |
1240 tree_low_cst (TREE_OPERAND (t, 2), 1)); | |
1241 obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1242 sprintf (buffer, "F" HOST_WIDE_INT_PRINT_DEC, | |
1243 tree_low_cst (TREE_OPERAND (t, 1), 1)); | |
1244 obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1245 } | |
1246 else | |
1247 { | |
1248 tree name = DECL_NAME (t); | |
1249 if (name) | |
1250 obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), | |
1251 IDENTIFIER_LENGTH (name)); | |
1252 else | |
1253 { | |
1254 sprintf (buffer, "D%u", DECL_UID (t)); | |
1255 obstack_grow (&sra_obstack, buffer, strlen (buffer)); | |
1256 } | |
1257 } | |
1258 } | |
1259 | |
1260 /* Construct a pretty variable name for an element's replacement variable. | |
1261 The name is built on the obstack. */ | |
1262 | |
1263 static char * | |
1264 build_element_name (struct sra_elt *elt) | |
1265 { | |
1266 build_element_name_1 (elt); | |
1267 obstack_1grow (&sra_obstack, '\0'); | |
1268 return XOBFINISH (&sra_obstack, char *); | |
1269 } | |
1270 | |
1271 /* Instantiate an element as an independent variable. */ | |
1272 | |
1273 static void | |
1274 instantiate_element (struct sra_elt *elt) | |
1275 { | |
1276 struct sra_elt *base_elt; | |
1277 tree var, base; | |
1278 bool nowarn = TREE_NO_WARNING (elt->element); | |
1279 | |
1280 for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) | |
1281 if (!nowarn) | |
1282 nowarn = TREE_NO_WARNING (base_elt->parent->element); | |
1283 base = base_elt->element; | |
1284 | |
1285 elt->replacement = var = make_rename_temp (elt->type, "SR"); | |
1286 | |
1287 if (DECL_P (elt->element) | |
1288 && !tree_int_cst_equal (DECL_SIZE (var), DECL_SIZE (elt->element))) | |
1289 { | |
1290 DECL_SIZE (var) = DECL_SIZE (elt->element); | |
1291 DECL_SIZE_UNIT (var) = DECL_SIZE_UNIT (elt->element); | |
1292 | |
1293 elt->in_bitfld_block = 1; | |
1294 elt->replacement = fold_build3 (BIT_FIELD_REF, elt->type, var, | |
1295 DECL_SIZE (var), | |
1296 BYTES_BIG_ENDIAN | |
1297 ? size_binop (MINUS_EXPR, | |
1298 TYPE_SIZE (elt->type), | |
1299 DECL_SIZE (var)) | |
1300 : bitsize_int (0)); | |
1301 } | |
1302 | |
1303 /* For vectors, if used on the left hand side with BIT_FIELD_REF, | |
1304 they are not a gimple register. */ | |
1305 if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs) | |
1306 DECL_GIMPLE_REG_P (var) = 0; | |
1307 | |
1308 DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); | |
1309 DECL_ARTIFICIAL (var) = 1; | |
1310 | |
1311 if (TREE_THIS_VOLATILE (elt->type)) | |
1312 { | |
1313 TREE_THIS_VOLATILE (var) = 1; | |
1314 TREE_SIDE_EFFECTS (var) = 1; | |
1315 } | |
1316 | |
1317 if (DECL_NAME (base) && !DECL_IGNORED_P (base)) | |
1318 { | |
1319 char *pretty_name = build_element_name (elt); | |
1320 DECL_NAME (var) = get_identifier (pretty_name); | |
1321 obstack_free (&sra_obstack, pretty_name); | |
1322 | |
1323 SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt)); | |
1324 DECL_DEBUG_EXPR_IS_FROM (var) = 1; | |
1325 | |
1326 DECL_IGNORED_P (var) = 0; | |
1327 TREE_NO_WARNING (var) = nowarn; | |
1328 } | |
1329 else | |
1330 { | |
1331 DECL_IGNORED_P (var) = 1; | |
1332 /* ??? We can't generate any warning that would be meaningful. */ | |
1333 TREE_NO_WARNING (var) = 1; | |
1334 } | |
1335 | |
1336 /* Zero-initialize bit-field scalarization variables, to avoid | |
1337 triggering undefined behavior. */ | |
1338 if (TREE_CODE (elt->element) == BIT_FIELD_REF | |
1339 || (var != elt->replacement | |
1340 && TREE_CODE (elt->replacement) == BIT_FIELD_REF)) | |
1341 { | |
1342 gimple_seq init = sra_build_assignment (var, | |
1343 fold_convert (TREE_TYPE (var), | |
1344 integer_zero_node) | |
1345 ); | |
1346 insert_edge_copies_seq (init, ENTRY_BLOCK_PTR); | |
1347 mark_all_v_defs_seq (init); | |
1348 } | |
1349 | |
1350 if (dump_file) | |
1351 { | |
1352 fputs (" ", dump_file); | |
1353 dump_sra_elt_name (dump_file, elt); | |
1354 fputs (" -> ", dump_file); | |
1355 print_generic_expr (dump_file, var, dump_flags); | |
1356 fputc ('\n', dump_file); | |
1357 } | |
1358 } | |
1359 | |
1360 /* Make one pass across an element tree deciding whether or not it's | |
1361 profitable to instantiate individual leaf scalars. | |
1362 | |
1363 PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES | |
1364 fields all the way up the tree. */ | |
1365 | |
1366 static void | |
1367 decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, | |
1368 unsigned int parent_copies) | |
1369 { | |
1370 if (dump_file && !elt->parent) | |
1371 { | |
1372 fputs ("Initial instantiation for ", dump_file); | |
1373 dump_sra_elt_name (dump_file, elt); | |
1374 fputc ('\n', dump_file); | |
1375 } | |
1376 | |
1377 if (elt->cannot_scalarize) | |
1378 return; | |
1379 | |
1380 if (elt->is_scalar) | |
1381 { | |
1382 /* The decision is simple: instantiate if we're used more frequently | |
1383 than the parent needs to be seen as a complete unit. */ | |
1384 if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) | |
1385 instantiate_element (elt); | |
1386 } | |
1387 else | |
1388 { | |
1389 struct sra_elt *c, *group; | |
1390 unsigned int this_uses = elt->n_uses + parent_uses; | |
1391 unsigned int this_copies = elt->n_copies + parent_copies; | |
1392 | |
1393 /* Consider groups of sub-elements as weighing in favour of | |
1394 instantiation whatever their size. */ | |
1395 for (group = elt->groups; group ; group = group->sibling) | |
1396 FOR_EACH_ACTUAL_CHILD (c, group) | |
1397 { | |
1398 c->n_uses += group->n_uses; | |
1399 c->n_copies += group->n_copies; | |
1400 } | |
1401 | |
1402 for (c = elt->children; c ; c = c->sibling) | |
1403 decide_instantiation_1 (c, this_uses, this_copies); | |
1404 } | |
1405 } | |
1406 | |
1407 /* Compute the size and number of all instantiated elements below ELT. | |
1408 We will only care about this if the size of the complete structure | |
1409 fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ | |
1410 | |
1411 static unsigned int | |
1412 sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) | |
1413 { | |
1414 if (elt->replacement) | |
1415 { | |
1416 *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); | |
1417 return 1; | |
1418 } | |
1419 else | |
1420 { | |
1421 struct sra_elt *c; | |
1422 unsigned int count = 0; | |
1423 | |
1424 for (c = elt->children; c ; c = c->sibling) | |
1425 count += sum_instantiated_sizes (c, sizep); | |
1426 | |
1427 return count; | |
1428 } | |
1429 } | |
1430 | |
1431 /* Instantiate fields in ELT->TYPE that are not currently present as | |
1432 children of ELT. */ | |
1433 | |
1434 static void instantiate_missing_elements (struct sra_elt *elt); | |
1435 | |
1436 static struct sra_elt * | |
1437 instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) | |
1438 { | |
1439 struct sra_elt *sub = lookup_element (elt, child, type, INSERT); | |
1440 if (sub->is_scalar) | |
1441 { | |
1442 if (sub->replacement == NULL) | |
1443 instantiate_element (sub); | |
1444 } | |
1445 else | |
1446 instantiate_missing_elements (sub); | |
1447 return sub; | |
1448 } | |
1449 | |
1450 /* Obtain the canonical type for field F of ELEMENT. */ | |
1451 | |
1452 static tree | |
1453 canon_type_for_field (tree f, tree element) | |
1454 { | |
1455 tree field_type = TREE_TYPE (f); | |
1456 | |
1457 /* canonicalize_component_ref() unwidens some bit-field types (not | |
1458 marked as DECL_BIT_FIELD in C++), so we must do the same, lest we | |
1459 may introduce type mismatches. */ | |
1460 if (INTEGRAL_TYPE_P (field_type) | |
1461 && DECL_MODE (f) != TYPE_MODE (field_type)) | |
1462 field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF, | |
1463 field_type, | |
1464 element, | |
1465 f, NULL_TREE), | |
1466 NULL_TREE)); | |
1467 | |
1468 return field_type; | |
1469 } | |
1470 | |
1471 /* Look for adjacent fields of ELT starting at F that we'd like to | |
1472 scalarize as a single variable. Return the last field of the | |
1473 group. */ | |
1474 | |
1475 static tree | |
1476 try_instantiate_multiple_fields (struct sra_elt *elt, tree f) | |
1477 { | |
1478 int count; | |
1479 unsigned HOST_WIDE_INT align, bit, size, alchk; | |
1480 enum machine_mode mode; | |
1481 tree first = f, prev; | |
1482 tree type, var; | |
1483 struct sra_elt *block; | |
1484 | |
1485 /* Point fields are typically best handled as standalone entities. */ | |
1486 if (POINTER_TYPE_P (TREE_TYPE (f))) | |
1487 return f; | |
1488 | |
1489 if (!is_sra_scalar_type (TREE_TYPE (f)) | |
1490 || !host_integerp (DECL_FIELD_OFFSET (f), 1) | |
1491 || !host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) | |
1492 || !host_integerp (DECL_SIZE (f), 1) | |
1493 || lookup_element (elt, f, NULL, NO_INSERT)) | |
1494 return f; | |
1495 | |
1496 block = elt; | |
1497 | |
1498 /* For complex and array objects, there are going to be integer | |
1499 literals as child elements. In this case, we can't just take the | |
1500 alignment and mode of the decl, so we instead rely on the element | |
1501 type. | |
1502 | |
1503 ??? We could try to infer additional alignment from the full | |
1504 object declaration and the location of the sub-elements we're | |
1505 accessing. */ | |
1506 for (count = 0; !DECL_P (block->element); count++) | |
1507 block = block->parent; | |
1508 | |
1509 align = DECL_ALIGN (block->element); | |
1510 alchk = GET_MODE_BITSIZE (DECL_MODE (block->element)); | |
1511 | |
1512 if (count) | |
1513 { | |
1514 type = TREE_TYPE (block->element); | |
1515 while (count--) | |
1516 type = TREE_TYPE (type); | |
1517 | |
1518 align = TYPE_ALIGN (type); | |
1519 alchk = GET_MODE_BITSIZE (TYPE_MODE (type)); | |
1520 } | |
1521 | |
1522 if (align < alchk) | |
1523 align = alchk; | |
1524 | |
1525 /* Coalescing wider fields is probably pointless and | |
1526 inefficient. */ | |
1527 if (align > BITS_PER_WORD) | |
1528 align = BITS_PER_WORD; | |
1529 | |
1530 bit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT | |
1531 + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); | |
1532 size = tree_low_cst (DECL_SIZE (f), 1); | |
1533 | |
1534 alchk = align - 1; | |
1535 alchk = ~alchk; | |
1536 | |
1537 if ((bit & alchk) != ((bit + size - 1) & alchk)) | |
1538 return f; | |
1539 | |
1540 /* Find adjacent fields in the same alignment word. */ | |
1541 | |
1542 for (prev = f, f = TREE_CHAIN (f); | |
1543 f && TREE_CODE (f) == FIELD_DECL | |
1544 && is_sra_scalar_type (TREE_TYPE (f)) | |
1545 && host_integerp (DECL_FIELD_OFFSET (f), 1) | |
1546 && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) | |
1547 && host_integerp (DECL_SIZE (f), 1) | |
1548 && !lookup_element (elt, f, NULL, NO_INSERT); | |
1549 prev = f, f = TREE_CHAIN (f)) | |
1550 { | |
1551 unsigned HOST_WIDE_INT nbit, nsize; | |
1552 | |
1553 nbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT | |
1554 + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); | |
1555 nsize = tree_low_cst (DECL_SIZE (f), 1); | |
1556 | |
1557 if (bit + size == nbit) | |
1558 { | |
1559 if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) | |
1560 { | |
1561 /* If we're at an alignment boundary, don't bother | |
1562 growing alignment such that we can include this next | |
1563 field. */ | |
1564 if ((nbit & alchk) | |
1565 || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) | |
1566 break; | |
1567 | |
1568 align = GET_MODE_BITSIZE (DECL_MODE (f)); | |
1569 alchk = align - 1; | |
1570 alchk = ~alchk; | |
1571 | |
1572 if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) | |
1573 break; | |
1574 } | |
1575 size += nsize; | |
1576 } | |
1577 else if (nbit + nsize == bit) | |
1578 { | |
1579 if ((nbit & alchk) != ((bit + size - 1) & alchk)) | |
1580 { | |
1581 if ((bit & alchk) | |
1582 || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) | |
1583 break; | |
1584 | |
1585 align = GET_MODE_BITSIZE (DECL_MODE (f)); | |
1586 alchk = align - 1; | |
1587 alchk = ~alchk; | |
1588 | |
1589 if ((nbit & alchk) != ((bit + size - 1) & alchk)) | |
1590 break; | |
1591 } | |
1592 bit = nbit; | |
1593 size += nsize; | |
1594 } | |
1595 else | |
1596 break; | |
1597 } | |
1598 | |
1599 f = prev; | |
1600 | |
1601 if (f == first) | |
1602 return f; | |
1603 | |
1604 gcc_assert ((bit & alchk) == ((bit + size - 1) & alchk)); | |
1605 | |
1606 /* Try to widen the bit range so as to cover padding bits as well. */ | |
1607 | |
1608 if ((bit & ~alchk) || size != align) | |
1609 { | |
1610 unsigned HOST_WIDE_INT mbit = bit & alchk; | |
1611 unsigned HOST_WIDE_INT msize = align; | |
1612 | |
1613 for (f = TYPE_FIELDS (elt->type); | |
1614 f; f = TREE_CHAIN (f)) | |
1615 { | |
1616 unsigned HOST_WIDE_INT fbit, fsize; | |
1617 | |
1618 /* Skip the fields from first to prev. */ | |
1619 if (f == first) | |
1620 { | |
1621 f = prev; | |
1622 continue; | |
1623 } | |
1624 | |
1625 if (!(TREE_CODE (f) == FIELD_DECL | |
1626 && host_integerp (DECL_FIELD_OFFSET (f), 1) | |
1627 && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1))) | |
1628 continue; | |
1629 | |
1630 fbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT | |
1631 + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); | |
1632 | |
1633 /* If we're past the selected word, we're fine. */ | |
1634 if ((bit & alchk) < (fbit & alchk)) | |
1635 continue; | |
1636 | |
1637 if (host_integerp (DECL_SIZE (f), 1)) | |
1638 fsize = tree_low_cst (DECL_SIZE (f), 1); | |
1639 else | |
1640 /* Assume a variable-sized field takes up all space till | |
1641 the end of the word. ??? Endianness issues? */ | |
1642 fsize = align - (fbit & alchk); | |
1643 | |
1644 if ((fbit & alchk) < (bit & alchk)) | |
1645 { | |
1646 /* A large field might start at a previous word and | |
1647 extend into the selected word. Exclude those | |
1648 bits. ??? Endianness issues? */ | |
1649 HOST_WIDE_INT diff = fbit + fsize - mbit; | |
1650 | |
1651 if (diff <= 0) | |
1652 continue; | |
1653 | |
1654 mbit += diff; | |
1655 msize -= diff; | |
1656 } | |
1657 else | |
1658 { | |
1659 /* Non-overlapping, great. */ | |
1660 if (fbit + fsize <= mbit | |
1661 || mbit + msize <= fbit) | |
1662 continue; | |
1663 | |
1664 if (fbit <= mbit) | |
1665 { | |
1666 unsigned HOST_WIDE_INT diff = fbit + fsize - mbit; | |
1667 mbit += diff; | |
1668 msize -= diff; | |
1669 } | |
1670 else if (fbit > mbit) | |
1671 msize -= (mbit + msize - fbit); | |
1672 else | |
1673 gcc_unreachable (); | |
1674 } | |
1675 } | |
1676 | |
1677 bit = mbit; | |
1678 size = msize; | |
1679 } | |
1680 | |
1681 /* Now we know the bit range we're interested in. Find the smallest | |
1682 machine mode we can use to access it. */ | |
1683 | |
1684 for (mode = smallest_mode_for_size (size, MODE_INT); | |
1685 ; | |
1686 mode = GET_MODE_WIDER_MODE (mode)) | |
1687 { | |
1688 gcc_assert (mode != VOIDmode); | |
1689 | |
1690 alchk = GET_MODE_PRECISION (mode) - 1; | |
1691 alchk = ~alchk; | |
1692 | |
1693 if ((bit & alchk) == ((bit + size - 1) & alchk)) | |
1694 break; | |
1695 } | |
1696 | |
1697 gcc_assert (~alchk < align); | |
1698 | |
1699 /* Create the field group as a single variable. */ | |
1700 | |
1701 /* We used to create a type for the mode above, but size turns | |
1702 to be out not of mode-size. As we need a matching type | |
1703 to build a BIT_FIELD_REF, use a nonstandard integer type as | |
1704 fallback. */ | |
1705 type = lang_hooks.types.type_for_size (size, 1); | |
1706 if (!type || TYPE_PRECISION (type) != size) | |
1707 type = build_nonstandard_integer_type (size, 1); | |
1708 gcc_assert (type); | |
1709 var = build3 (BIT_FIELD_REF, type, NULL_TREE, | |
1710 bitsize_int (size), bitsize_int (bit)); | |
1711 | |
1712 block = instantiate_missing_elements_1 (elt, var, type); | |
1713 gcc_assert (block && block->is_scalar); | |
1714 | |
1715 var = block->replacement; | |
1716 block->in_bitfld_block = 2; | |
1717 | |
1718 /* Add the member fields to the group, such that they access | |
1719 portions of the group variable. */ | |
1720 | |
1721 for (f = first; f != TREE_CHAIN (prev); f = TREE_CHAIN (f)) | |
1722 { | |
1723 tree field_type = canon_type_for_field (f, elt->element); | |
1724 struct sra_elt *fld = lookup_element (block, f, field_type, INSERT); | |
1725 | |
1726 gcc_assert (fld && fld->is_scalar && !fld->replacement); | |
1727 | |
1728 fld->replacement = fold_build3 (BIT_FIELD_REF, field_type, var, | |
1729 bitsize_int (TYPE_PRECISION (field_type)), | |
1730 bitsize_int | |
1731 ((TREE_INT_CST_LOW (DECL_FIELD_OFFSET (f)) | |
1732 * BITS_PER_UNIT | |
1733 + (TREE_INT_CST_LOW | |
1734 (DECL_FIELD_BIT_OFFSET (f))) | |
1735 - (TREE_INT_CST_LOW | |
1736 (TREE_OPERAND (block->element, 2)))) | |
1737 & ~alchk)); | |
1738 fld->in_bitfld_block = 1; | |
1739 } | |
1740 | |
1741 return prev; | |
1742 } | |
1743 | |
1744 static void | |
1745 instantiate_missing_elements (struct sra_elt *elt) | |
1746 { | |
1747 tree type = elt->type; | |
1748 | |
1749 switch (TREE_CODE (type)) | |
1750 { | |
1751 case RECORD_TYPE: | |
1752 { | |
1753 tree f; | |
1754 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) | |
1755 if (TREE_CODE (f) == FIELD_DECL) | |
1756 { | |
1757 tree last = try_instantiate_multiple_fields (elt, f); | |
1758 | |
1759 if (last != f) | |
1760 { | |
1761 f = last; | |
1762 continue; | |
1763 } | |
1764 | |
1765 instantiate_missing_elements_1 (elt, f, | |
1766 canon_type_for_field | |
1767 (f, elt->element)); | |
1768 } | |
1769 break; | |
1770 } | |
1771 | |
1772 case ARRAY_TYPE: | |
1773 { | |
1774 tree i, max, subtype; | |
1775 | |
1776 i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); | |
1777 max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); | |
1778 subtype = TREE_TYPE (type); | |
1779 | |
1780 while (1) | |
1781 { | |
1782 instantiate_missing_elements_1 (elt, i, subtype); | |
1783 if (tree_int_cst_equal (i, max)) | |
1784 break; | |
1785 i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); | |
1786 } | |
1787 | |
1788 break; | |
1789 } | |
1790 | |
1791 case COMPLEX_TYPE: | |
1792 type = TREE_TYPE (type); | |
1793 instantiate_missing_elements_1 (elt, integer_zero_node, type); | |
1794 instantiate_missing_elements_1 (elt, integer_one_node, type); | |
1795 break; | |
1796 | |
1797 default: | |
1798 gcc_unreachable (); | |
1799 } | |
1800 } | |
1801 | |
1802 /* Return true if there is only one non aggregate field in the record, TYPE. | |
1803 Return false otherwise. */ | |
1804 | |
1805 static bool | |
1806 single_scalar_field_in_record_p (tree type) | |
1807 { | |
1808 int num_fields = 0; | |
1809 tree field; | |
1810 if (TREE_CODE (type) != RECORD_TYPE) | |
1811 return false; | |
1812 | |
1813 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
1814 if (TREE_CODE (field) == FIELD_DECL) | |
1815 { | |
1816 num_fields++; | |
1817 | |
1818 if (num_fields == 2) | |
1819 return false; | |
1820 | |
1821 if (AGGREGATE_TYPE_P (TREE_TYPE (field))) | |
1822 return false; | |
1823 } | |
1824 | |
1825 return true; | |
1826 } | |
1827 | |
1828 /* Make one pass across an element tree deciding whether to perform block | |
1829 or element copies. If we decide on element copies, instantiate all | |
1830 elements. Return true if there are any instantiated sub-elements. */ | |
1831 | |
1832 static bool | |
1833 decide_block_copy (struct sra_elt *elt) | |
1834 { | |
1835 struct sra_elt *c; | |
1836 bool any_inst; | |
1837 | |
1838 /* We shouldn't be invoked on groups of sub-elements as they must | |
1839 behave like their parent as far as block copy is concerned. */ | |
1840 gcc_assert (!elt->is_group); | |
1841 | |
1842 /* If scalarization is disabled, respect it. */ | |
1843 if (elt->cannot_scalarize) | |
1844 { | |
1845 elt->use_block_copy = 1; | |
1846 | |
1847 if (dump_file) | |
1848 { | |
1849 fputs ("Scalarization disabled for ", dump_file); | |
1850 dump_sra_elt_name (dump_file, elt); | |
1851 fputc ('\n', dump_file); | |
1852 } | |
1853 | |
1854 /* Disable scalarization of sub-elements */ | |
1855 for (c = elt->children; c; c = c->sibling) | |
1856 { | |
1857 c->cannot_scalarize = 1; | |
1858 decide_block_copy (c); | |
1859 } | |
1860 | |
1861 /* Groups behave like their parent. */ | |
1862 for (c = elt->groups; c; c = c->sibling) | |
1863 { | |
1864 c->cannot_scalarize = 1; | |
1865 c->use_block_copy = 1; | |
1866 } | |
1867 | |
1868 return false; | |
1869 } | |
1870 | |
1871 /* Don't decide if we've no uses and no groups. */ | |
1872 if (elt->n_uses == 0 && elt->n_copies == 0 && elt->groups == NULL) | |
1873 ; | |
1874 | |
1875 else if (!elt->is_scalar) | |
1876 { | |
1877 tree size_tree = TYPE_SIZE_UNIT (elt->type); | |
1878 bool use_block_copy = true; | |
1879 | |
1880 /* Tradeoffs for COMPLEX types pretty much always make it better | |
1881 to go ahead and split the components. */ | |
1882 if (TREE_CODE (elt->type) == COMPLEX_TYPE) | |
1883 use_block_copy = false; | |
1884 | |
1885 /* Don't bother trying to figure out the rest if the structure is | |
1886 so large we can't do easy arithmetic. This also forces block | |
1887 copies for variable sized structures. */ | |
1888 else if (host_integerp (size_tree, 1)) | |
1889 { | |
1890 unsigned HOST_WIDE_INT full_size, inst_size = 0; | |
1891 unsigned int max_size, max_count, inst_count, full_count; | |
1892 | |
1893 /* If the sra-max-structure-size parameter is 0, then the | |
1894 user has not overridden the parameter and we can choose a | |
1895 sensible default. */ | |
1896 max_size = SRA_MAX_STRUCTURE_SIZE | |
1897 ? SRA_MAX_STRUCTURE_SIZE | |
1898 : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD; | |
1899 max_count = SRA_MAX_STRUCTURE_COUNT | |
1900 ? SRA_MAX_STRUCTURE_COUNT | |
1901 : MOVE_RATIO (optimize_function_for_speed_p (cfun)); | |
1902 | |
1903 full_size = tree_low_cst (size_tree, 1); | |
1904 full_count = count_type_elements (elt->type, false); | |
1905 inst_count = sum_instantiated_sizes (elt, &inst_size); | |
1906 | |
1907 /* If there is only one scalar field in the record, don't block copy. */ | |
1908 if (single_scalar_field_in_record_p (elt->type)) | |
1909 use_block_copy = false; | |
1910 | |
1911 /* ??? What to do here. If there are two fields, and we've only | |
1912 instantiated one, then instantiating the other is clearly a win. | |
1913 If there are a large number of fields then the size of the copy | |
1914 is much more of a factor. */ | |
1915 | |
1916 /* If the structure is small, and we've made copies, go ahead | |
1917 and instantiate, hoping that the copies will go away. */ | |
1918 if (full_size <= max_size | |
1919 && (full_count - inst_count) <= max_count | |
1920 && elt->n_copies > elt->n_uses) | |
1921 use_block_copy = false; | |
1922 else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO | |
1923 && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO) | |
1924 use_block_copy = false; | |
1925 | |
1926 /* In order to avoid block copy, we have to be able to instantiate | |
1927 all elements of the type. See if this is possible. */ | |
1928 if (!use_block_copy | |
1929 && (!can_completely_scalarize_p (elt) | |
1930 || !type_can_instantiate_all_elements (elt->type))) | |
1931 use_block_copy = true; | |
1932 } | |
1933 | |
1934 elt->use_block_copy = use_block_copy; | |
1935 | |
1936 /* Groups behave like their parent. */ | |
1937 for (c = elt->groups; c; c = c->sibling) | |
1938 c->use_block_copy = use_block_copy; | |
1939 | |
1940 if (dump_file) | |
1941 { | |
1942 fprintf (dump_file, "Using %s for ", | |
1943 use_block_copy ? "block-copy" : "element-copy"); | |
1944 dump_sra_elt_name (dump_file, elt); | |
1945 fputc ('\n', dump_file); | |
1946 } | |
1947 | |
1948 if (!use_block_copy) | |
1949 { | |
1950 instantiate_missing_elements (elt); | |
1951 return true; | |
1952 } | |
1953 } | |
1954 | |
1955 any_inst = elt->replacement != NULL; | |
1956 | |
1957 for (c = elt->children; c ; c = c->sibling) | |
1958 any_inst |= decide_block_copy (c); | |
1959 | |
1960 return any_inst; | |
1961 } | |
1962 | |
1963 /* Entry point to phase 3. Instantiate scalar replacement variables. */ | |
1964 | |
1965 static void | |
1966 decide_instantiations (void) | |
1967 { | |
1968 unsigned int i; | |
1969 bool cleared_any; | |
1970 bitmap_head done_head; | |
1971 bitmap_iterator bi; | |
1972 | |
1973 /* We cannot clear bits from a bitmap we're iterating over, | |
1974 so save up all the bits to clear until the end. */ | |
1975 bitmap_initialize (&done_head, &bitmap_default_obstack); | |
1976 cleared_any = false; | |
1977 | |
1978 EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) | |
1979 { | |
1980 tree var = referenced_var (i); | |
1981 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
1982 if (elt) | |
1983 { | |
1984 decide_instantiation_1 (elt, 0, 0); | |
1985 if (!decide_block_copy (elt)) | |
1986 elt = NULL; | |
1987 } | |
1988 if (!elt) | |
1989 { | |
1990 bitmap_set_bit (&done_head, i); | |
1991 cleared_any = true; | |
1992 } | |
1993 } | |
1994 | |
1995 if (cleared_any) | |
1996 { | |
1997 bitmap_and_compl_into (sra_candidates, &done_head); | |
1998 bitmap_and_compl_into (needs_copy_in, &done_head); | |
1999 } | |
2000 bitmap_clear (&done_head); | |
2001 | |
2002 mark_set_for_renaming (sra_candidates); | |
2003 | |
2004 if (dump_file) | |
2005 fputc ('\n', dump_file); | |
2006 } | |
2007 | |
2008 | |
2009 /* Phase Four: Update the function to match the replacements created. */ | |
2010 | |
2011 /* Mark all the variables in VDEF/VUSE operators for STMT for | |
2012 renaming. This becomes necessary when we modify all of a | |
2013 non-scalar. */ | |
2014 | |
2015 static void | |
2016 mark_all_v_defs_stmt (gimple stmt) | |
2017 { | |
2018 tree sym; | |
2019 ssa_op_iter iter; | |
2020 | |
2021 update_stmt_if_modified (stmt); | |
2022 | |
2023 FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS) | |
2024 { | |
2025 if (TREE_CODE (sym) == SSA_NAME) | |
2026 sym = SSA_NAME_VAR (sym); | |
2027 mark_sym_for_renaming (sym); | |
2028 } | |
2029 } | |
2030 | |
2031 | |
2032 /* Mark all the variables in virtual operands in all the statements in | |
2033 LIST for renaming. */ | |
2034 | |
2035 static void | |
2036 mark_all_v_defs_seq (gimple_seq seq) | |
2037 { | |
2038 gimple_stmt_iterator gsi; | |
2039 | |
2040 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2041 mark_all_v_defs_stmt (gsi_stmt (gsi)); | |
2042 } | |
2043 | |
2044 /* Mark every replacement under ELT with TREE_NO_WARNING. */ | |
2045 | |
2046 static void | |
2047 mark_no_warning (struct sra_elt *elt) | |
2048 { | |
2049 if (!elt->all_no_warning) | |
2050 { | |
2051 if (elt->replacement) | |
2052 TREE_NO_WARNING (elt->replacement) = 1; | |
2053 else | |
2054 { | |
2055 struct sra_elt *c; | |
2056 FOR_EACH_ACTUAL_CHILD (c, elt) | |
2057 mark_no_warning (c); | |
2058 } | |
2059 elt->all_no_warning = true; | |
2060 } | |
2061 } | |
2062 | |
2063 /* Build a single level component reference to ELT rooted at BASE. */ | |
2064 | |
2065 static tree | |
2066 generate_one_element_ref (struct sra_elt *elt, tree base) | |
2067 { | |
2068 switch (TREE_CODE (TREE_TYPE (base))) | |
2069 { | |
2070 case RECORD_TYPE: | |
2071 { | |
2072 tree field = elt->element; | |
2073 | |
2074 /* We can't test elt->in_bitfld_block here because, when this is | |
2075 called from instantiate_element, we haven't set this field | |
2076 yet. */ | |
2077 if (TREE_CODE (field) == BIT_FIELD_REF) | |
2078 { | |
2079 tree ret = unshare_expr (field); | |
2080 TREE_OPERAND (ret, 0) = base; | |
2081 return ret; | |
2082 } | |
2083 | |
2084 /* Watch out for compatible records with differing field lists. */ | |
2085 if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base))) | |
2086 field = find_compatible_field (TREE_TYPE (base), field); | |
2087 | |
2088 return build3 (COMPONENT_REF, elt->type, base, field, NULL); | |
2089 } | |
2090 | |
2091 case ARRAY_TYPE: | |
2092 if (TREE_CODE (elt->element) == RANGE_EXPR) | |
2093 return build4 (ARRAY_RANGE_REF, elt->type, base, | |
2094 TREE_OPERAND (elt->element, 0), NULL, NULL); | |
2095 else | |
2096 return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); | |
2097 | |
2098 case COMPLEX_TYPE: | |
2099 if (elt->element == integer_zero_node) | |
2100 return build1 (REALPART_EXPR, elt->type, base); | |
2101 else | |
2102 return build1 (IMAGPART_EXPR, elt->type, base); | |
2103 | |
2104 default: | |
2105 gcc_unreachable (); | |
2106 } | |
2107 } | |
2108 | |
2109 /* Build a full component reference to ELT rooted at its native variable. */ | |
2110 | |
2111 static tree | |
2112 generate_element_ref (struct sra_elt *elt) | |
2113 { | |
2114 if (elt->parent) | |
2115 return generate_one_element_ref (elt, generate_element_ref (elt->parent)); | |
2116 else | |
2117 return elt->element; | |
2118 } | |
2119 | |
2120 /* Return true if BF is a bit-field that we can handle like a scalar. */ | |
2121 | |
2122 static bool | |
2123 scalar_bitfield_p (tree bf) | |
2124 { | |
2125 return (TREE_CODE (bf) == BIT_FIELD_REF | |
2126 && (is_gimple_reg (TREE_OPERAND (bf, 0)) | |
2127 || (TYPE_MODE (TREE_TYPE (TREE_OPERAND (bf, 0))) != BLKmode | |
2128 && (!TREE_SIDE_EFFECTS (TREE_OPERAND (bf, 0)) | |
2129 || (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE | |
2130 (TREE_OPERAND (bf, 0)))) | |
2131 <= BITS_PER_WORD))))); | |
2132 } | |
2133 | |
2134 /* Create an assignment statement from SRC to DST. */ | |
2135 | |
2136 static gimple_seq | |
2137 sra_build_assignment (tree dst, tree src) | |
2138 { | |
2139 gimple stmt; | |
2140 gimple_seq seq = NULL, seq2 = NULL; | |
2141 /* Turning BIT_FIELD_REFs into bit operations enables other passes | |
2142 to do a much better job at optimizing the code. | |
2143 From dst = BIT_FIELD_REF <var, sz, off> we produce | |
2144 | |
2145 SR.1 = (scalar type) var; | |
2146 SR.2 = SR.1 >> off; | |
2147 SR.3 = SR.2 & ((1 << sz) - 1); | |
2148 ... possible sign extension of SR.3 ... | |
2149 dst = (destination type) SR.3; | |
2150 */ | |
2151 if (scalar_bitfield_p (src)) | |
2152 { | |
2153 tree var, shift, width; | |
2154 tree utype, stype; | |
2155 bool unsignedp = (INTEGRAL_TYPE_P (TREE_TYPE (src)) | |
2156 ? TYPE_UNSIGNED (TREE_TYPE (src)) : true); | |
2157 struct gimplify_ctx gctx; | |
2158 | |
2159 var = TREE_OPERAND (src, 0); | |
2160 width = TREE_OPERAND (src, 1); | |
2161 /* The offset needs to be adjusted to a right shift quantity | |
2162 depending on the endianness. */ | |
2163 if (BYTES_BIG_ENDIAN) | |
2164 { | |
2165 tree tmp = size_binop (PLUS_EXPR, width, TREE_OPERAND (src, 2)); | |
2166 shift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), tmp); | |
2167 } | |
2168 else | |
2169 shift = TREE_OPERAND (src, 2); | |
2170 | |
2171 /* In weird cases we have non-integral types for the source or | |
2172 destination object. | |
2173 ??? For unknown reasons we also want an unsigned scalar type. */ | |
2174 stype = TREE_TYPE (var); | |
2175 if (!INTEGRAL_TYPE_P (stype)) | |
2176 stype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW | |
2177 (TYPE_SIZE (stype)), 1); | |
2178 else if (!TYPE_UNSIGNED (stype)) | |
2179 stype = unsigned_type_for (stype); | |
2180 | |
2181 utype = TREE_TYPE (dst); | |
2182 if (!INTEGRAL_TYPE_P (utype)) | |
2183 utype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW | |
2184 (TYPE_SIZE (utype)), 1); | |
2185 else if (!TYPE_UNSIGNED (utype)) | |
2186 utype = unsigned_type_for (utype); | |
2187 | |
2188 /* Convert the base var of the BIT_FIELD_REF to the scalar type | |
2189 we use for computation if we cannot use it directly. */ | |
2190 if (INTEGRAL_TYPE_P (TREE_TYPE (var))) | |
2191 var = fold_convert (stype, var); | |
2192 else | |
2193 var = fold_build1 (VIEW_CONVERT_EXPR, stype, var); | |
2194 | |
2195 if (!integer_zerop (shift)) | |
2196 var = fold_build2 (RSHIFT_EXPR, stype, var, shift); | |
2197 | |
2198 /* If we need a masking operation, produce one. */ | |
2199 if (TREE_INT_CST_LOW (width) == TYPE_PRECISION (stype)) | |
2200 unsignedp = true; | |
2201 else | |
2202 { | |
2203 tree one = build_int_cst_wide (stype, 1, 0); | |
2204 tree mask = int_const_binop (LSHIFT_EXPR, one, width, 0); | |
2205 mask = int_const_binop (MINUS_EXPR, mask, one, 0); | |
2206 var = fold_build2 (BIT_AND_EXPR, stype, var, mask); | |
2207 } | |
2208 | |
2209 /* After shifting and masking, convert to the target type. */ | |
2210 var = fold_convert (utype, var); | |
2211 | |
2212 /* Perform sign extension, if required. | |
2213 ??? This should never be necessary. */ | |
2214 if (!unsignedp) | |
2215 { | |
2216 tree signbit = int_const_binop (LSHIFT_EXPR, | |
2217 build_int_cst_wide (utype, 1, 0), | |
2218 size_binop (MINUS_EXPR, width, | |
2219 bitsize_int (1)), 0); | |
2220 | |
2221 var = fold_build2 (BIT_XOR_EXPR, utype, var, signbit); | |
2222 var = fold_build2 (MINUS_EXPR, utype, var, signbit); | |
2223 } | |
2224 | |
2225 /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ | |
2226 STRIP_NOPS (dst); | |
2227 | |
2228 /* Finally, move and convert to the destination. */ | |
2229 if (INTEGRAL_TYPE_P (TREE_TYPE (dst))) | |
2230 var = fold_convert (TREE_TYPE (dst), var); | |
2231 else | |
2232 var = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (dst), var); | |
2233 | |
2234 push_gimplify_context (&gctx); | |
2235 gctx.into_ssa = true; | |
2236 gctx.allow_rhs_cond_expr = true; | |
2237 | |
2238 gimplify_assign (dst, var, &seq); | |
2239 | |
2240 if (gimple_referenced_vars (cfun)) | |
2241 for (var = gctx.temps; var; var = TREE_CHAIN (var)) | |
2242 add_referenced_var (var); | |
2243 pop_gimplify_context (NULL); | |
2244 | |
2245 return seq; | |
2246 } | |
2247 | |
2248 /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ | |
2249 if (CONVERT_EXPR_P (dst)) | |
2250 { | |
2251 STRIP_NOPS (dst); | |
2252 src = fold_convert (TREE_TYPE (dst), src); | |
2253 } | |
2254 /* It was hoped that we could perform some type sanity checking | |
2255 here, but since front-ends can emit accesses of fields in types | |
2256 different from their nominal types and copy structures containing | |
2257 them as a whole, we'd have to handle such differences here. | |
2258 Since such accesses under different types require compatibility | |
2259 anyway, there's little point in making tests and/or adding | |
2260 conversions to ensure the types of src and dst are the same. | |
2261 So we just assume type differences at this point are ok. | |
2262 The only exception we make here are pointer types, which can be different | |
2263 in e.g. structurally equal, but non-identical RECORD_TYPEs. */ | |
2264 else if (POINTER_TYPE_P (TREE_TYPE (dst)) | |
2265 && !useless_type_conversion_p (TREE_TYPE (dst), TREE_TYPE (src))) | |
2266 src = fold_convert (TREE_TYPE (dst), src); | |
2267 | |
2268 /* ??? Only call the gimplifier if we need to. Otherwise we may | |
2269 end up substituting with DECL_VALUE_EXPR - see PR37380. */ | |
2270 if (!handled_component_p (src) | |
2271 && !SSA_VAR_P (src)) | |
2272 { | |
2273 src = force_gimple_operand (src, &seq2, false, NULL_TREE); | |
2274 gimple_seq_add_seq (&seq, seq2); | |
2275 } | |
2276 stmt = gimple_build_assign (dst, src); | |
2277 gimple_seq_add_stmt (&seq, stmt); | |
2278 return seq; | |
2279 } | |
2280 | |
2281 /* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment() | |
2282 takes care of assignments, but we must create copies for uses. */ | |
2283 #define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : unshare_expr (t)) | |
2284 | |
2285 /* Emit an assignment from SRC to DST, but if DST is a scalarizable | |
2286 BIT_FIELD_REF, turn it into bit operations. */ | |
2287 | |
2288 static gimple_seq | |
2289 sra_build_bf_assignment (tree dst, tree src) | |
2290 { | |
2291 tree var, type, utype, tmp, tmp2, tmp3; | |
2292 gimple_seq seq; | |
2293 gimple stmt; | |
2294 tree cst, cst2, mask; | |
2295 tree minshift, maxshift; | |
2296 | |
2297 if (TREE_CODE (dst) != BIT_FIELD_REF) | |
2298 return sra_build_assignment (dst, src); | |
2299 | |
2300 var = TREE_OPERAND (dst, 0); | |
2301 | |
2302 if (!scalar_bitfield_p (dst)) | |
2303 return sra_build_assignment (REPLDUP (dst), src); | |
2304 | |
2305 seq = NULL; | |
2306 | |
2307 cst = fold_convert (bitsizetype, TREE_OPERAND (dst, 2)); | |
2308 cst2 = size_binop (PLUS_EXPR, | |
2309 fold_convert (bitsizetype, TREE_OPERAND (dst, 1)), | |
2310 cst); | |
2311 | |
2312 if (BYTES_BIG_ENDIAN) | |
2313 { | |
2314 maxshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst); | |
2315 minshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst2); | |
2316 } | |
2317 else | |
2318 { | |
2319 maxshift = cst2; | |
2320 minshift = cst; | |
2321 } | |
2322 | |
2323 type = TREE_TYPE (var); | |
2324 if (!INTEGRAL_TYPE_P (type)) | |
2325 type = lang_hooks.types.type_for_size | |
2326 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (var))), 1); | |
2327 if (TYPE_UNSIGNED (type)) | |
2328 utype = type; | |
2329 else | |
2330 utype = unsigned_type_for (type); | |
2331 | |
2332 mask = build_int_cst_wide (utype, 1, 0); | |
2333 if (TREE_INT_CST_LOW (maxshift) == TYPE_PRECISION (utype)) | |
2334 cst = build_int_cst_wide (utype, 0, 0); | |
2335 else | |
2336 cst = int_const_binop (LSHIFT_EXPR, mask, maxshift, true); | |
2337 if (integer_zerop (minshift)) | |
2338 cst2 = mask; | |
2339 else | |
2340 cst2 = int_const_binop (LSHIFT_EXPR, mask, minshift, true); | |
2341 mask = int_const_binop (MINUS_EXPR, cst, cst2, true); | |
2342 mask = fold_build1 (BIT_NOT_EXPR, utype, mask); | |
2343 | |
2344 if (TYPE_MAIN_VARIANT (utype) != TYPE_MAIN_VARIANT (TREE_TYPE (var)) | |
2345 && !integer_zerop (mask)) | |
2346 { | |
2347 tmp = var; | |
2348 if (!is_gimple_variable (tmp)) | |
2349 tmp = unshare_expr (var); | |
2350 else | |
2351 TREE_NO_WARNING (var) = true; | |
2352 | |
2353 tmp2 = make_rename_temp (utype, "SR"); | |
2354 | |
2355 if (INTEGRAL_TYPE_P (TREE_TYPE (var))) | |
2356 tmp = fold_convert (utype, tmp); | |
2357 else | |
2358 tmp = fold_build1 (VIEW_CONVERT_EXPR, utype, tmp); | |
2359 | |
2360 stmt = gimple_build_assign (tmp2, tmp); | |
2361 gimple_seq_add_stmt (&seq, stmt); | |
2362 } | |
2363 else | |
2364 tmp2 = var; | |
2365 | |
2366 if (!integer_zerop (mask)) | |
2367 { | |
2368 tmp = make_rename_temp (utype, "SR"); | |
2369 stmt = gimple_build_assign (tmp, fold_build2 (BIT_AND_EXPR, utype, | |
2370 tmp2, mask)); | |
2371 gimple_seq_add_stmt (&seq, stmt); | |
2372 } | |
2373 else | |
2374 tmp = mask; | |
2375 | |
2376 if (is_gimple_reg (src) && INTEGRAL_TYPE_P (TREE_TYPE (src))) | |
2377 tmp2 = src; | |
2378 else if (INTEGRAL_TYPE_P (TREE_TYPE (src))) | |
2379 { | |
2380 gimple_seq tmp_seq; | |
2381 tmp2 = make_rename_temp (TREE_TYPE (src), "SR"); | |
2382 tmp_seq = sra_build_assignment (tmp2, src); | |
2383 gimple_seq_add_seq (&seq, tmp_seq); | |
2384 } | |
2385 else | |
2386 { | |
2387 gimple_seq tmp_seq; | |
2388 tmp2 = make_rename_temp | |
2389 (lang_hooks.types.type_for_size | |
2390 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (src))), | |
2391 1), "SR"); | |
2392 tmp_seq = sra_build_assignment (tmp2, fold_build1 (VIEW_CONVERT_EXPR, | |
2393 TREE_TYPE (tmp2), src)); | |
2394 gimple_seq_add_seq (&seq, tmp_seq); | |
2395 } | |
2396 | |
2397 if (!TYPE_UNSIGNED (TREE_TYPE (tmp2))) | |
2398 { | |
2399 gimple_seq tmp_seq; | |
2400 tree ut = unsigned_type_for (TREE_TYPE (tmp2)); | |
2401 tmp3 = make_rename_temp (ut, "SR"); | |
2402 tmp2 = fold_convert (ut, tmp2); | |
2403 tmp_seq = sra_build_assignment (tmp3, tmp2); | |
2404 gimple_seq_add_seq (&seq, tmp_seq); | |
2405 | |
2406 tmp2 = fold_build1 (BIT_NOT_EXPR, utype, mask); | |
2407 tmp2 = int_const_binop (RSHIFT_EXPR, tmp2, minshift, true); | |
2408 tmp2 = fold_convert (ut, tmp2); | |
2409 tmp2 = fold_build2 (BIT_AND_EXPR, ut, tmp3, tmp2); | |
2410 | |
2411 if (tmp3 != tmp2) | |
2412 { | |
2413 tmp3 = make_rename_temp (ut, "SR"); | |
2414 tmp_seq = sra_build_assignment (tmp3, tmp2); | |
2415 gimple_seq_add_seq (&seq, tmp_seq); | |
2416 } | |
2417 | |
2418 tmp2 = tmp3; | |
2419 } | |
2420 | |
2421 if (TYPE_MAIN_VARIANT (TREE_TYPE (tmp2)) != TYPE_MAIN_VARIANT (utype)) | |
2422 { | |
2423 gimple_seq tmp_seq; | |
2424 tmp3 = make_rename_temp (utype, "SR"); | |
2425 tmp2 = fold_convert (utype, tmp2); | |
2426 tmp_seq = sra_build_assignment (tmp3, tmp2); | |
2427 gimple_seq_add_seq (&seq, tmp_seq); | |
2428 tmp2 = tmp3; | |
2429 } | |
2430 | |
2431 if (!integer_zerop (minshift)) | |
2432 { | |
2433 tmp3 = make_rename_temp (utype, "SR"); | |
2434 stmt = gimple_build_assign (tmp3, fold_build2 (LSHIFT_EXPR, utype, | |
2435 tmp2, minshift)); | |
2436 gimple_seq_add_stmt (&seq, stmt); | |
2437 tmp2 = tmp3; | |
2438 } | |
2439 | |
2440 if (utype != TREE_TYPE (var)) | |
2441 tmp3 = make_rename_temp (utype, "SR"); | |
2442 else | |
2443 tmp3 = var; | |
2444 stmt = gimple_build_assign (tmp3, fold_build2 (BIT_IOR_EXPR, utype, | |
2445 tmp, tmp2)); | |
2446 gimple_seq_add_stmt (&seq, stmt); | |
2447 | |
2448 if (tmp3 != var) | |
2449 { | |
2450 if (TREE_TYPE (var) == type) | |
2451 stmt = gimple_build_assign (var, fold_convert (type, tmp3)); | |
2452 else | |
2453 stmt = gimple_build_assign (var, fold_build1 (VIEW_CONVERT_EXPR, | |
2454 TREE_TYPE (var), tmp3)); | |
2455 gimple_seq_add_stmt (&seq, stmt); | |
2456 } | |
2457 | |
2458 return seq; | |
2459 } | |
2460 | |
2461 /* Expand an assignment of SRC to the scalarized representation of | |
2462 ELT. If it is a field group, try to widen the assignment to cover | |
2463 the full variable. */ | |
2464 | |
2465 static gimple_seq | |
2466 sra_build_elt_assignment (struct sra_elt *elt, tree src) | |
2467 { | |
2468 tree dst = elt->replacement; | |
2469 tree var, tmp, cst, cst2; | |
2470 gimple stmt; | |
2471 gimple_seq seq; | |
2472 | |
2473 if (TREE_CODE (dst) != BIT_FIELD_REF | |
2474 || !elt->in_bitfld_block) | |
2475 return sra_build_assignment (REPLDUP (dst), src); | |
2476 | |
2477 var = TREE_OPERAND (dst, 0); | |
2478 | |
2479 /* Try to widen the assignment to the entire variable. | |
2480 We need the source to be a BIT_FIELD_REF as well, such that, for | |
2481 BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>, | |
2482 by design, conditions are met such that we can turn it into | |
2483 d = BIT_FIELD_REF<s,dw,sp-dp>. */ | |
2484 if (elt->in_bitfld_block == 2 | |
2485 && TREE_CODE (src) == BIT_FIELD_REF) | |
2486 { | |
2487 tmp = src; | |
2488 cst = TYPE_SIZE (TREE_TYPE (var)); | |
2489 cst2 = size_binop (MINUS_EXPR, TREE_OPERAND (src, 2), | |
2490 TREE_OPERAND (dst, 2)); | |
2491 | |
2492 src = TREE_OPERAND (src, 0); | |
2493 | |
2494 /* Avoid full-width bit-fields. */ | |
2495 if (integer_zerop (cst2) | |
2496 && tree_int_cst_equal (cst, TYPE_SIZE (TREE_TYPE (src)))) | |
2497 { | |
2498 if (INTEGRAL_TYPE_P (TREE_TYPE (src)) | |
2499 && !TYPE_UNSIGNED (TREE_TYPE (src))) | |
2500 src = fold_convert (unsigned_type_for (TREE_TYPE (src)), src); | |
2501 | |
2502 /* If a single conversion won't do, we'll need a statement | |
2503 list. */ | |
2504 if (TYPE_MAIN_VARIANT (TREE_TYPE (var)) | |
2505 != TYPE_MAIN_VARIANT (TREE_TYPE (src))) | |
2506 { | |
2507 gimple_seq tmp_seq; | |
2508 seq = NULL; | |
2509 | |
2510 if (!INTEGRAL_TYPE_P (TREE_TYPE (src))) | |
2511 src = fold_build1 (VIEW_CONVERT_EXPR, | |
2512 lang_hooks.types.type_for_size | |
2513 (TREE_INT_CST_LOW | |
2514 (TYPE_SIZE (TREE_TYPE (src))), | |
2515 1), src); | |
2516 gcc_assert (TYPE_UNSIGNED (TREE_TYPE (src))); | |
2517 | |
2518 tmp = make_rename_temp (TREE_TYPE (src), "SR"); | |
2519 stmt = gimple_build_assign (tmp, src); | |
2520 gimple_seq_add_stmt (&seq, stmt); | |
2521 | |
2522 tmp_seq = sra_build_assignment (var, | |
2523 fold_convert (TREE_TYPE (var), | |
2524 tmp)); | |
2525 gimple_seq_add_seq (&seq, tmp_seq); | |
2526 | |
2527 return seq; | |
2528 } | |
2529 | |
2530 src = fold_convert (TREE_TYPE (var), src); | |
2531 } | |
2532 else | |
2533 { | |
2534 src = fold_convert (TREE_TYPE (var), tmp); | |
2535 } | |
2536 | |
2537 return sra_build_assignment (var, src); | |
2538 } | |
2539 | |
2540 return sra_build_bf_assignment (dst, src); | |
2541 } | |
2542 | |
2543 /* Generate a set of assignment statements in *LIST_P to copy all | |
2544 instantiated elements under ELT to or from the equivalent structure | |
2545 rooted at EXPR. COPY_OUT controls the direction of the copy, with | |
2546 true meaning to copy out of EXPR into ELT. */ | |
2547 | |
2548 static void | |
2549 generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, | |
2550 gimple_seq *seq_p) | |
2551 { | |
2552 struct sra_elt *c; | |
2553 gimple_seq tmp_seq; | |
2554 tree t; | |
2555 | |
2556 if (!copy_out && TREE_CODE (expr) == SSA_NAME | |
2557 && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE) | |
2558 { | |
2559 tree r, i; | |
2560 | |
2561 c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT); | |
2562 r = c->replacement; | |
2563 c = lookup_element (elt, integer_one_node, NULL, NO_INSERT); | |
2564 i = c->replacement; | |
2565 | |
2566 t = build2 (COMPLEX_EXPR, elt->type, r, i); | |
2567 tmp_seq = sra_build_bf_assignment (expr, t); | |
2568 SSA_NAME_DEF_STMT (expr) = gimple_seq_last_stmt (tmp_seq); | |
2569 gimple_seq_add_seq (seq_p, tmp_seq); | |
2570 } | |
2571 else if (elt->replacement) | |
2572 { | |
2573 if (copy_out) | |
2574 tmp_seq = sra_build_elt_assignment (elt, expr); | |
2575 else | |
2576 tmp_seq = sra_build_bf_assignment (expr, REPLDUP (elt->replacement)); | |
2577 gimple_seq_add_seq (seq_p, tmp_seq); | |
2578 } | |
2579 else | |
2580 { | |
2581 FOR_EACH_ACTUAL_CHILD (c, elt) | |
2582 { | |
2583 t = generate_one_element_ref (c, unshare_expr (expr)); | |
2584 generate_copy_inout (c, copy_out, t, seq_p); | |
2585 } | |
2586 } | |
2587 } | |
2588 | |
2589 /* Generate a set of assignment statements in *LIST_P to copy all instantiated | |
2590 elements under SRC to their counterparts under DST. There must be a 1-1 | |
2591 correspondence of instantiated elements. */ | |
2592 | |
2593 static void | |
2594 generate_element_copy (struct sra_elt *dst, struct sra_elt *src, gimple_seq *seq_p) | |
2595 { | |
2596 struct sra_elt *dc, *sc; | |
2597 | |
2598 FOR_EACH_ACTUAL_CHILD (dc, dst) | |
2599 { | |
2600 sc = lookup_element (src, dc->element, NULL, NO_INSERT); | |
2601 if (!sc && dc->in_bitfld_block == 2) | |
2602 { | |
2603 struct sra_elt *dcs; | |
2604 | |
2605 FOR_EACH_ACTUAL_CHILD (dcs, dc) | |
2606 { | |
2607 sc = lookup_element (src, dcs->element, NULL, NO_INSERT); | |
2608 gcc_assert (sc); | |
2609 generate_element_copy (dcs, sc, seq_p); | |
2610 } | |
2611 | |
2612 continue; | |
2613 } | |
2614 | |
2615 /* If DST and SRC are structs with the same elements, but do not have | |
2616 the same TYPE_MAIN_VARIANT, then lookup of DST FIELD_DECL in SRC | |
2617 will fail. Try harder by finding the corresponding FIELD_DECL | |
2618 in SRC. */ | |
2619 if (!sc) | |
2620 { | |
2621 tree f; | |
2622 | |
2623 gcc_assert (useless_type_conversion_p (dst->type, src->type)); | |
2624 gcc_assert (TREE_CODE (dc->element) == FIELD_DECL); | |
2625 for (f = TYPE_FIELDS (src->type); f ; f = TREE_CHAIN (f)) | |
2626 if (simple_cst_equal (DECL_FIELD_OFFSET (f), | |
2627 DECL_FIELD_OFFSET (dc->element)) > 0 | |
2628 && simple_cst_equal (DECL_FIELD_BIT_OFFSET (f), | |
2629 DECL_FIELD_BIT_OFFSET (dc->element)) > 0 | |
2630 && simple_cst_equal (DECL_SIZE (f), | |
2631 DECL_SIZE (dc->element)) > 0 | |
2632 && (useless_type_conversion_p (TREE_TYPE (dc->element), | |
2633 TREE_TYPE (f)) | |
2634 || (POINTER_TYPE_P (TREE_TYPE (dc->element)) | |
2635 && POINTER_TYPE_P (TREE_TYPE (f))))) | |
2636 break; | |
2637 gcc_assert (f != NULL_TREE); | |
2638 sc = lookup_element (src, f, NULL, NO_INSERT); | |
2639 } | |
2640 | |
2641 generate_element_copy (dc, sc, seq_p); | |
2642 } | |
2643 | |
2644 if (dst->replacement) | |
2645 { | |
2646 gimple_seq tmp_seq; | |
2647 | |
2648 gcc_assert (src->replacement); | |
2649 | |
2650 tmp_seq = sra_build_elt_assignment (dst, REPLDUP (src->replacement)); | |
2651 gimple_seq_add_seq (seq_p, tmp_seq); | |
2652 } | |
2653 } | |
2654 | |
2655 /* Generate a set of assignment statements in *LIST_P to zero all instantiated | |
2656 elements under ELT. In addition, do not assign to elements that have been | |
2657 marked VISITED but do reset the visited flag; this allows easy coordination | |
2658 with generate_element_init. */ | |
2659 | |
2660 static void | |
2661 generate_element_zero (struct sra_elt *elt, gimple_seq *seq_p) | |
2662 { | |
2663 struct sra_elt *c; | |
2664 | |
2665 if (elt->visited) | |
2666 { | |
2667 elt->visited = false; | |
2668 return; | |
2669 } | |
2670 | |
2671 if (!elt->in_bitfld_block) | |
2672 FOR_EACH_ACTUAL_CHILD (c, elt) | |
2673 generate_element_zero (c, seq_p); | |
2674 | |
2675 if (elt->replacement) | |
2676 { | |
2677 tree t; | |
2678 gimple_seq tmp_seq; | |
2679 | |
2680 gcc_assert (elt->is_scalar); | |
2681 t = fold_convert (elt->type, integer_zero_node); | |
2682 | |
2683 tmp_seq = sra_build_elt_assignment (elt, t); | |
2684 gimple_seq_add_seq (seq_p, tmp_seq); | |
2685 } | |
2686 } | |
2687 | |
2688 /* Generate an assignment VAR = INIT, where INIT may need gimplification. | |
2689 Add the result to *LIST_P. */ | |
2690 | |
2691 static void | |
2692 generate_one_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) | |
2693 { | |
2694 gimple_seq tmp_seq = sra_build_elt_assignment (elt, init); | |
2695 gimple_seq_add_seq (seq_p, tmp_seq); | |
2696 } | |
2697 | |
2698 /* Generate a set of assignment statements in *LIST_P to set all instantiated | |
2699 elements under ELT with the contents of the initializer INIT. In addition, | |
2700 mark all assigned elements VISITED; this allows easy coordination with | |
2701 generate_element_zero. Return false if we found a case we couldn't | |
2702 handle. */ | |
2703 | |
2704 static bool | |
2705 generate_element_init_1 (struct sra_elt *elt, tree init, gimple_seq *seq_p) | |
2706 { | |
2707 bool result = true; | |
2708 enum tree_code init_code; | |
2709 struct sra_elt *sub; | |
2710 tree t; | |
2711 unsigned HOST_WIDE_INT idx; | |
2712 tree value, purpose; | |
2713 | |
2714 /* We can be passed DECL_INITIAL of a static variable. It might have a | |
2715 conversion, which we strip off here. */ | |
2716 STRIP_USELESS_TYPE_CONVERSION (init); | |
2717 init_code = TREE_CODE (init); | |
2718 | |
2719 if (elt->is_scalar) | |
2720 { | |
2721 if (elt->replacement) | |
2722 { | |
2723 generate_one_element_init (elt, init, seq_p); | |
2724 elt->visited = true; | |
2725 } | |
2726 return result; | |
2727 } | |
2728 | |
2729 switch (init_code) | |
2730 { | |
2731 case COMPLEX_CST: | |
2732 case COMPLEX_EXPR: | |
2733 FOR_EACH_ACTUAL_CHILD (sub, elt) | |
2734 { | |
2735 if (sub->element == integer_zero_node) | |
2736 t = (init_code == COMPLEX_EXPR | |
2737 ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); | |
2738 else | |
2739 t = (init_code == COMPLEX_EXPR | |
2740 ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); | |
2741 result &= generate_element_init_1 (sub, t, seq_p); | |
2742 } | |
2743 break; | |
2744 | |
2745 case CONSTRUCTOR: | |
2746 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value) | |
2747 { | |
2748 /* Array constructors are routinely created with NULL indices. */ | |
2749 if (purpose == NULL_TREE) | |
2750 { | |
2751 result = false; | |
2752 break; | |
2753 } | |
2754 if (TREE_CODE (purpose) == RANGE_EXPR) | |
2755 { | |
2756 tree lower = TREE_OPERAND (purpose, 0); | |
2757 tree upper = TREE_OPERAND (purpose, 1); | |
2758 | |
2759 while (1) | |
2760 { | |
2761 sub = lookup_element (elt, lower, NULL, NO_INSERT); | |
2762 if (sub != NULL) | |
2763 result &= generate_element_init_1 (sub, value, seq_p); | |
2764 if (tree_int_cst_equal (lower, upper)) | |
2765 break; | |
2766 lower = int_const_binop (PLUS_EXPR, lower, | |
2767 integer_one_node, true); | |
2768 } | |
2769 } | |
2770 else | |
2771 { | |
2772 sub = lookup_element (elt, purpose, NULL, NO_INSERT); | |
2773 if (sub != NULL) | |
2774 result &= generate_element_init_1 (sub, value, seq_p); | |
2775 } | |
2776 } | |
2777 break; | |
2778 | |
2779 default: | |
2780 elt->visited = true; | |
2781 result = false; | |
2782 } | |
2783 | |
2784 return result; | |
2785 } | |
2786 | |
2787 /* A wrapper function for generate_element_init_1 that handles cleanup after | |
2788 gimplification. */ | |
2789 | |
2790 static bool | |
2791 generate_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) | |
2792 { | |
2793 bool ret; | |
2794 struct gimplify_ctx gctx; | |
2795 | |
2796 push_gimplify_context (&gctx); | |
2797 ret = generate_element_init_1 (elt, init, seq_p); | |
2798 pop_gimplify_context (NULL); | |
2799 | |
2800 /* The replacement can expose previously unreferenced variables. */ | |
2801 if (ret && *seq_p) | |
2802 { | |
2803 gimple_stmt_iterator i; | |
2804 | |
2805 for (i = gsi_start (*seq_p); !gsi_end_p (i); gsi_next (&i)) | |
2806 find_new_referenced_vars (gsi_stmt (i)); | |
2807 } | |
2808 | |
2809 return ret; | |
2810 } | |
2811 | |
2812 /* Insert a gimple_seq SEQ on all the outgoing edges out of BB. Note that | |
2813 if BB has more than one edge, STMT will be replicated for each edge. | |
2814 Also, abnormal edges will be ignored. */ | |
2815 | |
2816 void | |
2817 insert_edge_copies_seq (gimple_seq seq, basic_block bb) | |
2818 { | |
2819 edge e; | |
2820 edge_iterator ei; | |
2821 unsigned n_copies = -1; | |
2822 | |
2823 FOR_EACH_EDGE (e, ei, bb->succs) | |
2824 if (!(e->flags & EDGE_ABNORMAL)) | |
2825 n_copies++; | |
2826 | |
2827 FOR_EACH_EDGE (e, ei, bb->succs) | |
2828 if (!(e->flags & EDGE_ABNORMAL)) | |
2829 gsi_insert_seq_on_edge (e, n_copies-- > 0 ? gimple_seq_copy (seq) : seq); | |
2830 } | |
2831 | |
2832 /* Helper function to insert LIST before GSI, and set up line number info. */ | |
2833 | |
2834 void | |
2835 sra_insert_before (gimple_stmt_iterator *gsi, gimple_seq seq) | |
2836 { | |
2837 gimple stmt = gsi_stmt (*gsi); | |
2838 | |
2839 if (gimple_has_location (stmt)) | |
2840 annotate_all_with_location (seq, gimple_location (stmt)); | |
2841 gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); | |
2842 } | |
2843 | |
2844 /* Similarly, but insert after GSI. Handles insertion onto edges as well. */ | |
2845 | |
2846 void | |
2847 sra_insert_after (gimple_stmt_iterator *gsi, gimple_seq seq) | |
2848 { | |
2849 gimple stmt = gsi_stmt (*gsi); | |
2850 | |
2851 if (gimple_has_location (stmt)) | |
2852 annotate_all_with_location (seq, gimple_location (stmt)); | |
2853 | |
2854 if (stmt_ends_bb_p (stmt)) | |
2855 insert_edge_copies_seq (seq, gsi_bb (*gsi)); | |
2856 else | |
2857 gsi_insert_seq_after (gsi, seq, GSI_SAME_STMT); | |
2858 } | |
2859 | |
2860 /* Similarly, but replace the statement at GSI. */ | |
2861 | |
2862 static void | |
2863 sra_replace (gimple_stmt_iterator *gsi, gimple_seq seq) | |
2864 { | |
2865 sra_insert_before (gsi, seq); | |
2866 gsi_remove (gsi, false); | |
2867 if (gsi_end_p (*gsi)) | |
2868 *gsi = gsi_last (gsi_seq (*gsi)); | |
2869 else | |
2870 gsi_prev (gsi); | |
2871 } | |
2872 | |
2873 /* Data structure that bitfield_overlaps_p fills in with information | |
2874 about the element passed in and how much of it overlaps with the | |
2875 bit-range passed it to. */ | |
2876 | |
2877 struct bitfield_overlap_info | |
2878 { | |
2879 /* The bit-length of an element. */ | |
2880 tree field_len; | |
2881 | |
2882 /* The bit-position of the element in its parent. */ | |
2883 tree field_pos; | |
2884 | |
2885 /* The number of bits of the element that overlap with the incoming | |
2886 bit range. */ | |
2887 tree overlap_len; | |
2888 | |
2889 /* The first bit of the element that overlaps with the incoming bit | |
2890 range. */ | |
2891 tree overlap_pos; | |
2892 }; | |
2893 | |
2894 /* Return true if a BIT_FIELD_REF<(FLD->parent), BLEN, BPOS> | |
2895 expression (referenced as BF below) accesses any of the bits in FLD, | |
2896 false if it doesn't. If DATA is non-null, its field_len and | |
2897 field_pos are filled in such that BIT_FIELD_REF<(FLD->parent), | |
2898 field_len, field_pos> (referenced as BFLD below) represents the | |
2899 entire field FLD->element, and BIT_FIELD_REF<BFLD, overlap_len, | |
2900 overlap_pos> represents the portion of the entire field that | |
2901 overlaps with BF. */ | |
2902 | |
2903 static bool | |
2904 bitfield_overlaps_p (tree blen, tree bpos, struct sra_elt *fld, | |
2905 struct bitfield_overlap_info *data) | |
2906 { | |
2907 tree flen, fpos; | |
2908 bool ret; | |
2909 | |
2910 if (TREE_CODE (fld->element) == FIELD_DECL) | |
2911 { | |
2912 flen = fold_convert (bitsizetype, DECL_SIZE (fld->element)); | |
2913 fpos = fold_convert (bitsizetype, DECL_FIELD_OFFSET (fld->element)); | |
2914 fpos = size_binop (MULT_EXPR, fpos, bitsize_int (BITS_PER_UNIT)); | |
2915 fpos = size_binop (PLUS_EXPR, fpos, DECL_FIELD_BIT_OFFSET (fld->element)); | |
2916 } | |
2917 else if (TREE_CODE (fld->element) == BIT_FIELD_REF) | |
2918 { | |
2919 flen = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 1)); | |
2920 fpos = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 2)); | |
2921 } | |
2922 else if (TREE_CODE (fld->element) == INTEGER_CST) | |
2923 { | |
2924 tree domain_type = TYPE_DOMAIN (TREE_TYPE (fld->parent->element)); | |
2925 flen = fold_convert (bitsizetype, TYPE_SIZE (fld->type)); | |
2926 fpos = fold_convert (bitsizetype, fld->element); | |
2927 if (domain_type && TYPE_MIN_VALUE (domain_type)) | |
2928 fpos = size_binop (MINUS_EXPR, fpos, | |
2929 fold_convert (bitsizetype, | |
2930 TYPE_MIN_VALUE (domain_type))); | |
2931 fpos = size_binop (MULT_EXPR, flen, fpos); | |
2932 } | |
2933 else | |
2934 gcc_unreachable (); | |
2935 | |
2936 gcc_assert (host_integerp (blen, 1) | |
2937 && host_integerp (bpos, 1) | |
2938 && host_integerp (flen, 1) | |
2939 && host_integerp (fpos, 1)); | |
2940 | |
2941 ret = ((!tree_int_cst_lt (fpos, bpos) | |
2942 && tree_int_cst_lt (size_binop (MINUS_EXPR, fpos, bpos), | |
2943 blen)) | |
2944 || (!tree_int_cst_lt (bpos, fpos) | |
2945 && tree_int_cst_lt (size_binop (MINUS_EXPR, bpos, fpos), | |
2946 flen))); | |
2947 | |
2948 if (!ret) | |
2949 return ret; | |
2950 | |
2951 if (data) | |
2952 { | |
2953 tree bend, fend; | |
2954 | |
2955 data->field_len = flen; | |
2956 data->field_pos = fpos; | |
2957 | |
2958 fend = size_binop (PLUS_EXPR, fpos, flen); | |
2959 bend = size_binop (PLUS_EXPR, bpos, blen); | |
2960 | |
2961 if (tree_int_cst_lt (bend, fend)) | |
2962 data->overlap_len = size_binop (MINUS_EXPR, bend, fpos); | |
2963 else | |
2964 data->overlap_len = NULL; | |
2965 | |
2966 if (tree_int_cst_lt (fpos, bpos)) | |
2967 { | |
2968 data->overlap_pos = size_binop (MINUS_EXPR, bpos, fpos); | |
2969 data->overlap_len = size_binop (MINUS_EXPR, | |
2970 data->overlap_len | |
2971 ? data->overlap_len | |
2972 : data->field_len, | |
2973 data->overlap_pos); | |
2974 } | |
2975 else | |
2976 data->overlap_pos = NULL; | |
2977 } | |
2978 | |
2979 return ret; | |
2980 } | |
2981 | |
2982 /* Add to LISTP a sequence of statements that copies BLEN bits between | |
2983 VAR and the scalarized elements of ELT, starting a bit VPOS of VAR | |
2984 and at bit BPOS of ELT. The direction of the copy is given by | |
2985 TO_VAR. */ | |
2986 | |
2987 static void | |
2988 sra_explode_bitfield_assignment (tree var, tree vpos, bool to_var, | |
2989 gimple_seq *seq_p, tree blen, tree bpos, | |
2990 struct sra_elt *elt) | |
2991 { | |
2992 struct sra_elt *fld; | |
2993 struct bitfield_overlap_info flp; | |
2994 | |
2995 FOR_EACH_ACTUAL_CHILD (fld, elt) | |
2996 { | |
2997 tree flen, fpos; | |
2998 | |
2999 if (!bitfield_overlaps_p (blen, bpos, fld, &flp)) | |
3000 continue; | |
3001 | |
3002 flen = flp.overlap_len ? flp.overlap_len : flp.field_len; | |
3003 fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); | |
3004 | |
3005 if (fld->replacement) | |
3006 { | |
3007 tree infld, invar, type; | |
3008 gimple_seq st; | |
3009 | |
3010 infld = fld->replacement; | |
3011 | |
3012 type = unsigned_type_for (TREE_TYPE (infld)); | |
3013 if (TYPE_PRECISION (type) != TREE_INT_CST_LOW (flen)) | |
3014 type = build_nonstandard_integer_type (TREE_INT_CST_LOW (flen), 1); | |
3015 | |
3016 if (TREE_CODE (infld) == BIT_FIELD_REF) | |
3017 { | |
3018 fpos = size_binop (PLUS_EXPR, fpos, TREE_OPERAND (infld, 2)); | |
3019 infld = TREE_OPERAND (infld, 0); | |
3020 } | |
3021 else if (BYTES_BIG_ENDIAN && DECL_P (fld->element) | |
3022 && !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (infld)), | |
3023 DECL_SIZE (fld->element))) | |
3024 { | |
3025 fpos = size_binop (PLUS_EXPR, fpos, | |
3026 TYPE_SIZE (TREE_TYPE (infld))); | |
3027 fpos = size_binop (MINUS_EXPR, fpos, | |
3028 DECL_SIZE (fld->element)); | |
3029 } | |
3030 | |
3031 infld = fold_build3 (BIT_FIELD_REF, type, infld, flen, fpos); | |
3032 | |
3033 invar = size_binop (MINUS_EXPR, flp.field_pos, bpos); | |
3034 if (flp.overlap_pos) | |
3035 invar = size_binop (PLUS_EXPR, invar, flp.overlap_pos); | |
3036 invar = size_binop (PLUS_EXPR, invar, vpos); | |
3037 | |
3038 invar = fold_build3 (BIT_FIELD_REF, type, var, flen, invar); | |
3039 | |
3040 if (to_var) | |
3041 st = sra_build_bf_assignment (invar, infld); | |
3042 else | |
3043 st = sra_build_bf_assignment (infld, invar); | |
3044 | |
3045 gimple_seq_add_seq (seq_p, st); | |
3046 } | |
3047 else | |
3048 { | |
3049 tree sub = size_binop (MINUS_EXPR, flp.field_pos, bpos); | |
3050 sub = size_binop (PLUS_EXPR, vpos, sub); | |
3051 if (flp.overlap_pos) | |
3052 sub = size_binop (PLUS_EXPR, sub, flp.overlap_pos); | |
3053 | |
3054 sra_explode_bitfield_assignment (var, sub, to_var, seq_p, | |
3055 flen, fpos, fld); | |
3056 } | |
3057 } | |
3058 } | |
3059 | |
3060 /* Add to LISTBEFOREP statements that copy scalarized members of ELT | |
3061 that overlap with BIT_FIELD_REF<(ELT->element), BLEN, BPOS> back | |
3062 into the full variable, and to LISTAFTERP, if non-NULL, statements | |
3063 that copy the (presumably modified) overlapping portions of the | |
3064 full variable back to the scalarized variables. */ | |
3065 | |
3066 static void | |
3067 sra_sync_for_bitfield_assignment (gimple_seq *seq_before_p, | |
3068 gimple_seq *seq_after_p, | |
3069 tree blen, tree bpos, | |
3070 struct sra_elt *elt) | |
3071 { | |
3072 struct sra_elt *fld; | |
3073 struct bitfield_overlap_info flp; | |
3074 | |
3075 FOR_EACH_ACTUAL_CHILD (fld, elt) | |
3076 if (bitfield_overlaps_p (blen, bpos, fld, &flp)) | |
3077 { | |
3078 if (fld->replacement || (!flp.overlap_len && !flp.overlap_pos)) | |
3079 { | |
3080 generate_copy_inout (fld, false, generate_element_ref (fld), | |
3081 seq_before_p); | |
3082 mark_no_warning (fld); | |
3083 if (seq_after_p) | |
3084 generate_copy_inout (fld, true, generate_element_ref (fld), | |
3085 seq_after_p); | |
3086 } | |
3087 else | |
3088 { | |
3089 tree flen = flp.overlap_len ? flp.overlap_len : flp.field_len; | |
3090 tree fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); | |
3091 | |
3092 sra_sync_for_bitfield_assignment (seq_before_p, seq_after_p, | |
3093 flen, fpos, fld); | |
3094 } | |
3095 } | |
3096 } | |
3097 | |
3098 /* Scalarize a USE. To recap, this is either a simple reference to ELT, | |
3099 if elt is scalar, or some occurrence of ELT that requires a complete | |
3100 aggregate. IS_OUTPUT is true if ELT is being modified. */ | |
3101 | |
3102 static void | |
3103 scalarize_use (struct sra_elt *elt, tree *expr_p, gimple_stmt_iterator *gsi, | |
3104 bool is_output, bool use_all) | |
3105 { | |
3106 gimple stmt = gsi_stmt (*gsi); | |
3107 tree bfexpr; | |
3108 | |
3109 if (elt->replacement) | |
3110 { | |
3111 tree replacement = elt->replacement; | |
3112 | |
3113 /* If we have a replacement, then updating the reference is as | |
3114 simple as modifying the existing statement in place. */ | |
3115 if (is_output | |
3116 && TREE_CODE (elt->replacement) == BIT_FIELD_REF | |
3117 && is_gimple_reg (TREE_OPERAND (elt->replacement, 0)) | |
3118 && is_gimple_assign (stmt) | |
3119 && gimple_assign_lhs_ptr (stmt) == expr_p) | |
3120 { | |
3121 gimple_seq newseq; | |
3122 /* RHS must be a single operand. */ | |
3123 gcc_assert (gimple_assign_single_p (stmt)); | |
3124 newseq = sra_build_elt_assignment (elt, gimple_assign_rhs1 (stmt)); | |
3125 sra_replace (gsi, newseq); | |
3126 return; | |
3127 } | |
3128 else if (!is_output | |
3129 && TREE_CODE (elt->replacement) == BIT_FIELD_REF | |
3130 && is_gimple_assign (stmt) | |
3131 && gimple_assign_rhs1_ptr (stmt) == expr_p) | |
3132 { | |
3133 tree tmp = make_rename_temp | |
3134 (TREE_TYPE (gimple_assign_lhs (stmt)), "SR"); | |
3135 gimple_seq newseq = sra_build_assignment (tmp, REPLDUP (elt->replacement)); | |
3136 | |
3137 sra_insert_before (gsi, newseq); | |
3138 replacement = tmp; | |
3139 } | |
3140 if (is_output) | |
3141 mark_all_v_defs_stmt (stmt); | |
3142 *expr_p = REPLDUP (replacement); | |
3143 update_stmt (stmt); | |
3144 } | |
3145 else if (use_all && is_output | |
3146 && is_gimple_assign (stmt) | |
3147 && TREE_CODE (bfexpr | |
3148 = gimple_assign_lhs (stmt)) == BIT_FIELD_REF | |
3149 && &TREE_OPERAND (bfexpr, 0) == expr_p | |
3150 && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) | |
3151 && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) | |
3152 { | |
3153 gimple_seq seq_before = NULL; | |
3154 gimple_seq seq_after = NULL; | |
3155 tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); | |
3156 tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); | |
3157 bool update = false; | |
3158 | |
3159 if (!elt->use_block_copy) | |
3160 { | |
3161 tree type = TREE_TYPE (bfexpr); | |
3162 tree var = make_rename_temp (type, "SR"), tmp, vpos; | |
3163 gimple st; | |
3164 | |
3165 gimple_assign_set_lhs (stmt, var); | |
3166 update = true; | |
3167 | |
3168 if (!TYPE_UNSIGNED (type)) | |
3169 { | |
3170 type = unsigned_type_for (type); | |
3171 tmp = make_rename_temp (type, "SR"); | |
3172 st = gimple_build_assign (tmp, fold_convert (type, var)); | |
3173 gimple_seq_add_stmt (&seq_after, st); | |
3174 var = tmp; | |
3175 } | |
3176 | |
3177 /* If VAR is wider than BLEN bits, it is padded at the | |
3178 most-significant end. We want to set VPOS such that | |
3179 <BIT_FIELD_REF VAR BLEN VPOS> would refer to the | |
3180 least-significant BLEN bits of VAR. */ | |
3181 if (BYTES_BIG_ENDIAN) | |
3182 vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); | |
3183 else | |
3184 vpos = bitsize_int (0); | |
3185 sra_explode_bitfield_assignment | |
3186 (var, vpos, false, &seq_after, blen, bpos, elt); | |
3187 } | |
3188 else | |
3189 sra_sync_for_bitfield_assignment | |
3190 (&seq_before, &seq_after, blen, bpos, elt); | |
3191 | |
3192 if (seq_before) | |
3193 { | |
3194 mark_all_v_defs_seq (seq_before); | |
3195 sra_insert_before (gsi, seq_before); | |
3196 } | |
3197 if (seq_after) | |
3198 { | |
3199 mark_all_v_defs_seq (seq_after); | |
3200 sra_insert_after (gsi, seq_after); | |
3201 } | |
3202 | |
3203 if (update) | |
3204 update_stmt (stmt); | |
3205 } | |
3206 else if (use_all && !is_output | |
3207 && is_gimple_assign (stmt) | |
3208 && TREE_CODE (bfexpr | |
3209 = gimple_assign_rhs1 (stmt)) == BIT_FIELD_REF | |
3210 && &TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == expr_p | |
3211 && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) | |
3212 && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) | |
3213 { | |
3214 gimple_seq seq = NULL; | |
3215 tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); | |
3216 tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); | |
3217 bool update = false; | |
3218 | |
3219 if (!elt->use_block_copy) | |
3220 { | |
3221 tree type = TREE_TYPE (bfexpr); | |
3222 tree var = make_rename_temp (type, "SR"), tmp, vpos; | |
3223 gimple st = NULL; | |
3224 | |
3225 gimple_assign_set_rhs1 (stmt, var); | |
3226 update = true; | |
3227 | |
3228 if (!TYPE_UNSIGNED (type)) | |
3229 { | |
3230 type = unsigned_type_for (type); | |
3231 tmp = make_rename_temp (type, "SR"); | |
3232 st = gimple_build_assign (var, | |
3233 fold_convert (TREE_TYPE (var), tmp)); | |
3234 var = tmp; | |
3235 } | |
3236 | |
3237 gimple_seq_add_stmt (&seq, | |
3238 gimple_build_assign | |
3239 (var, build_int_cst_wide (type, 0, 0))); | |
3240 | |
3241 /* If VAR is wider than BLEN bits, it is padded at the | |
3242 most-significant end. We want to set VPOS such that | |
3243 <BIT_FIELD_REF VAR BLEN VPOS> would refer to the | |
3244 least-significant BLEN bits of VAR. */ | |
3245 if (BYTES_BIG_ENDIAN) | |
3246 vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); | |
3247 else | |
3248 vpos = bitsize_int (0); | |
3249 sra_explode_bitfield_assignment | |
3250 (var, vpos, true, &seq, blen, bpos, elt); | |
3251 | |
3252 if (st) | |
3253 gimple_seq_add_stmt (&seq, st); | |
3254 } | |
3255 else | |
3256 sra_sync_for_bitfield_assignment | |
3257 (&seq, NULL, blen, bpos, elt); | |
3258 | |
3259 if (seq) | |
3260 { | |
3261 mark_all_v_defs_seq (seq); | |
3262 sra_insert_before (gsi, seq); | |
3263 } | |
3264 | |
3265 if (update) | |
3266 update_stmt (stmt); | |
3267 } | |
3268 else | |
3269 { | |
3270 gimple_seq seq = NULL; | |
3271 | |
3272 /* Otherwise we need some copies. If ELT is being read, then we | |
3273 want to store all (modified) sub-elements back into the | |
3274 structure before the reference takes place. If ELT is being | |
3275 written, then we want to load the changed values back into | |
3276 our shadow variables. */ | |
3277 /* ??? We don't check modified for reads, we just always write all of | |
3278 the values. We should be able to record the SSA number of the VOP | |
3279 for which the values were last read. If that number matches the | |
3280 SSA number of the VOP in the current statement, then we needn't | |
3281 emit an assignment. This would also eliminate double writes when | |
3282 a structure is passed as more than one argument to a function call. | |
3283 This optimization would be most effective if sra_walk_function | |
3284 processed the blocks in dominator order. */ | |
3285 | |
3286 generate_copy_inout (elt, is_output, generate_element_ref (elt), &seq); | |
3287 if (seq == NULL) | |
3288 return; | |
3289 mark_all_v_defs_seq (seq); | |
3290 if (is_output) | |
3291 sra_insert_after (gsi, seq); | |
3292 else | |
3293 { | |
3294 sra_insert_before (gsi, seq); | |
3295 if (use_all) | |
3296 mark_no_warning (elt); | |
3297 } | |
3298 } | |
3299 } | |
3300 | |
3301 /* Scalarize a COPY. To recap, this is an assignment statement between | |
3302 two scalarizable references, LHS_ELT and RHS_ELT. */ | |
3303 | |
3304 static void | |
3305 scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, | |
3306 gimple_stmt_iterator *gsi) | |
3307 { | |
3308 gimple_seq seq; | |
3309 gimple stmt; | |
3310 | |
3311 if (lhs_elt->replacement && rhs_elt->replacement) | |
3312 { | |
3313 /* If we have two scalar operands, modify the existing statement. */ | |
3314 stmt = gsi_stmt (*gsi); | |
3315 | |
3316 /* See the commentary in sra_walk_function concerning | |
3317 RETURN_EXPR, and why we should never see one here. */ | |
3318 gcc_assert (is_gimple_assign (stmt)); | |
3319 gcc_assert (gimple_assign_copy_p (stmt)); | |
3320 | |
3321 | |
3322 gimple_assign_set_lhs (stmt, lhs_elt->replacement); | |
3323 gimple_assign_set_rhs1 (stmt, REPLDUP (rhs_elt->replacement)); | |
3324 update_stmt (stmt); | |
3325 } | |
3326 else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) | |
3327 { | |
3328 /* If either side requires a block copy, then sync the RHS back | |
3329 to the original structure, leave the original assignment | |
3330 statement (which will perform the block copy), then load the | |
3331 LHS values out of its now-updated original structure. */ | |
3332 /* ??? Could perform a modified pair-wise element copy. That | |
3333 would at least allow those elements that are instantiated in | |
3334 both structures to be optimized well. */ | |
3335 | |
3336 seq = NULL; | |
3337 generate_copy_inout (rhs_elt, false, | |
3338 generate_element_ref (rhs_elt), &seq); | |
3339 if (seq) | |
3340 { | |
3341 mark_all_v_defs_seq (seq); | |
3342 sra_insert_before (gsi, seq); | |
3343 } | |
3344 | |
3345 seq = NULL; | |
3346 generate_copy_inout (lhs_elt, true, | |
3347 generate_element_ref (lhs_elt), &seq); | |
3348 if (seq) | |
3349 { | |
3350 mark_all_v_defs_seq (seq); | |
3351 sra_insert_after (gsi, seq); | |
3352 } | |
3353 } | |
3354 else | |
3355 { | |
3356 /* Otherwise both sides must be fully instantiated. In which | |
3357 case perform pair-wise element assignments and replace the | |
3358 original block copy statement. */ | |
3359 | |
3360 stmt = gsi_stmt (*gsi); | |
3361 mark_all_v_defs_stmt (stmt); | |
3362 | |
3363 seq = NULL; | |
3364 generate_element_copy (lhs_elt, rhs_elt, &seq); | |
3365 gcc_assert (seq); | |
3366 mark_all_v_defs_seq (seq); | |
3367 sra_replace (gsi, seq); | |
3368 } | |
3369 } | |
3370 | |
3371 /* Scalarize an INIT. To recap, this is an assignment to a scalarizable | |
3372 reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or | |
3373 COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty | |
3374 CONSTRUCTOR. */ | |
3375 | |
3376 static void | |
3377 scalarize_init (struct sra_elt *lhs_elt, tree rhs, gimple_stmt_iterator *gsi) | |
3378 { | |
3379 bool result = true; | |
3380 gimple_seq seq = NULL, init_seq = NULL; | |
3381 | |
3382 /* Generate initialization statements for all members extant in the RHS. */ | |
3383 if (rhs) | |
3384 { | |
3385 /* Unshare the expression just in case this is from a decl's initial. */ | |
3386 rhs = unshare_expr (rhs); | |
3387 result = generate_element_init (lhs_elt, rhs, &init_seq); | |
3388 } | |
3389 | |
3390 if (!result) | |
3391 { | |
3392 /* If we failed to convert the entire initializer, then we must | |
3393 leave the structure assignment in place and must load values | |
3394 from the structure into the slots for which we did not find | |
3395 constants. The easiest way to do this is to generate a complete | |
3396 copy-out, and then follow that with the constant assignments | |
3397 that we were able to build. DCE will clean things up. */ | |
3398 gimple_seq seq0 = NULL; | |
3399 generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt), | |
3400 &seq0); | |
3401 gimple_seq_add_seq (&seq0, seq); | |
3402 seq = seq0; | |
3403 } | |
3404 else | |
3405 { | |
3406 /* CONSTRUCTOR is defined such that any member not mentioned is assigned | |
3407 a zero value. Initialize the rest of the instantiated elements. */ | |
3408 generate_element_zero (lhs_elt, &seq); | |
3409 gimple_seq_add_seq (&seq, init_seq); | |
3410 } | |
3411 | |
3412 if (lhs_elt->use_block_copy || !result) | |
3413 { | |
3414 /* Since LHS is not fully instantiated, we must leave the structure | |
3415 assignment in place. Treating this case differently from a USE | |
3416 exposes constants to later optimizations. */ | |
3417 if (seq) | |
3418 { | |
3419 mark_all_v_defs_seq (seq); | |
3420 sra_insert_after (gsi, seq); | |
3421 } | |
3422 } | |
3423 else | |
3424 { | |
3425 /* The LHS is fully instantiated. The list of initializations | |
3426 replaces the original structure assignment. */ | |
3427 gcc_assert (seq); | |
3428 mark_all_v_defs_stmt (gsi_stmt (*gsi)); | |
3429 mark_all_v_defs_seq (seq); | |
3430 sra_replace (gsi, seq); | |
3431 } | |
3432 } | |
3433 | |
3434 /* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP | |
3435 on all INDIRECT_REFs. */ | |
3436 | |
3437 static tree | |
3438 mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) | |
3439 { | |
3440 tree t = *tp; | |
3441 | |
3442 if (TREE_CODE (t) == INDIRECT_REF) | |
3443 { | |
3444 TREE_THIS_NOTRAP (t) = 1; | |
3445 *walk_subtrees = 0; | |
3446 } | |
3447 else if (IS_TYPE_OR_DECL_P (t)) | |
3448 *walk_subtrees = 0; | |
3449 | |
3450 return NULL; | |
3451 } | |
3452 | |
3453 /* Scalarize a LDST. To recap, this is an assignment between one scalarizable | |
3454 reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true | |
3455 if ELT is on the left-hand side. */ | |
3456 | |
3457 static void | |
3458 scalarize_ldst (struct sra_elt *elt, tree other, | |
3459 gimple_stmt_iterator *gsi, bool is_output) | |
3460 { | |
3461 /* Shouldn't have gotten called for a scalar. */ | |
3462 gcc_assert (!elt->replacement); | |
3463 | |
3464 if (elt->use_block_copy) | |
3465 { | |
3466 /* Since ELT is not fully instantiated, we have to leave the | |
3467 block copy in place. Treat this as a USE. */ | |
3468 scalarize_use (elt, NULL, gsi, is_output, false); | |
3469 } | |
3470 else | |
3471 { | |
3472 /* The interesting case is when ELT is fully instantiated. In this | |
3473 case we can have each element stored/loaded directly to/from the | |
3474 corresponding slot in OTHER. This avoids a block copy. */ | |
3475 | |
3476 gimple_seq seq = NULL; | |
3477 gimple stmt = gsi_stmt (*gsi); | |
3478 | |
3479 mark_all_v_defs_stmt (stmt); | |
3480 generate_copy_inout (elt, is_output, other, &seq); | |
3481 gcc_assert (seq); | |
3482 mark_all_v_defs_seq (seq); | |
3483 | |
3484 /* Preserve EH semantics. */ | |
3485 if (stmt_ends_bb_p (stmt)) | |
3486 { | |
3487 gimple_stmt_iterator si; | |
3488 gimple first; | |
3489 gimple_seq blist = NULL; | |
3490 bool thr = stmt_could_throw_p (stmt); | |
3491 | |
3492 /* If the last statement of this BB created an EH edge | |
3493 before scalarization, we have to locate the first | |
3494 statement that can throw in the new statement list and | |
3495 use that as the last statement of this BB, such that EH | |
3496 semantics is preserved. All statements up to this one | |
3497 are added to the same BB. All other statements in the | |
3498 list will be added to normal outgoing edges of the same | |
3499 BB. If they access any memory, it's the same memory, so | |
3500 we can assume they won't throw. */ | |
3501 si = gsi_start (seq); | |
3502 for (first = gsi_stmt (si); | |
3503 thr && !gsi_end_p (si) && !stmt_could_throw_p (first); | |
3504 first = gsi_stmt (si)) | |
3505 { | |
3506 gsi_remove (&si, false); | |
3507 gimple_seq_add_stmt (&blist, first); | |
3508 } | |
3509 | |
3510 /* Extract the first remaining statement from LIST, this is | |
3511 the EH statement if there is one. */ | |
3512 gsi_remove (&si, false); | |
3513 | |
3514 if (blist) | |
3515 sra_insert_before (gsi, blist); | |
3516 | |
3517 /* Replace the old statement with this new representative. */ | |
3518 gsi_replace (gsi, first, true); | |
3519 | |
3520 if (!gsi_end_p (si)) | |
3521 { | |
3522 /* If any reference would trap, then they all would. And more | |
3523 to the point, the first would. Therefore none of the rest | |
3524 will trap since the first didn't. Indicate this by | |
3525 iterating over the remaining statements and set | |
3526 TREE_THIS_NOTRAP in all INDIRECT_REFs. */ | |
3527 do | |
3528 { | |
3529 walk_gimple_stmt (&si, NULL, mark_notrap, NULL); | |
3530 gsi_next (&si); | |
3531 } | |
3532 while (!gsi_end_p (si)); | |
3533 | |
3534 insert_edge_copies_seq (seq, gsi_bb (*gsi)); | |
3535 } | |
3536 } | |
3537 else | |
3538 sra_replace (gsi, seq); | |
3539 } | |
3540 } | |
3541 | |
3542 /* Generate initializations for all scalarizable parameters. */ | |
3543 | |
3544 static void | |
3545 scalarize_parms (void) | |
3546 { | |
3547 gimple_seq seq = NULL; | |
3548 unsigned i; | |
3549 bitmap_iterator bi; | |
3550 | |
3551 EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi) | |
3552 { | |
3553 tree var = referenced_var (i); | |
3554 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); | |
3555 generate_copy_inout (elt, true, var, &seq); | |
3556 } | |
3557 | |
3558 if (seq) | |
3559 { | |
3560 insert_edge_copies_seq (seq, ENTRY_BLOCK_PTR); | |
3561 mark_all_v_defs_seq (seq); | |
3562 } | |
3563 } | |
3564 | |
3565 /* Entry point to phase 4. Update the function to match replacements. */ | |
3566 | |
3567 static void | |
3568 scalarize_function (void) | |
3569 { | |
3570 static const struct sra_walk_fns fns = { | |
3571 scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false | |
3572 }; | |
3573 | |
3574 sra_walk_function (&fns); | |
3575 scalarize_parms (); | |
3576 gsi_commit_edge_inserts (); | |
3577 } | |
3578 | |
3579 | |
3580 /* Debug helper function. Print ELT in a nice human-readable format. */ | |
3581 | |
3582 static void | |
3583 dump_sra_elt_name (FILE *f, struct sra_elt *elt) | |
3584 { | |
3585 if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) | |
3586 { | |
3587 fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); | |
3588 dump_sra_elt_name (f, elt->parent); | |
3589 } | |
3590 else | |
3591 { | |
3592 if (elt->parent) | |
3593 dump_sra_elt_name (f, elt->parent); | |
3594 if (DECL_P (elt->element)) | |
3595 { | |
3596 if (TREE_CODE (elt->element) == FIELD_DECL) | |
3597 fputc ('.', f); | |
3598 print_generic_expr (f, elt->element, dump_flags); | |
3599 } | |
3600 else if (TREE_CODE (elt->element) == BIT_FIELD_REF) | |
3601 fprintf (f, "$B" HOST_WIDE_INT_PRINT_DEC "F" HOST_WIDE_INT_PRINT_DEC, | |
3602 tree_low_cst (TREE_OPERAND (elt->element, 2), 1), | |
3603 tree_low_cst (TREE_OPERAND (elt->element, 1), 1)); | |
3604 else if (TREE_CODE (elt->element) == RANGE_EXPR) | |
3605 fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]", | |
3606 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)), | |
3607 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1))); | |
3608 else | |
3609 fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", | |
3610 TREE_INT_CST_LOW (elt->element)); | |
3611 } | |
3612 } | |
3613 | |
3614 /* Likewise, but callable from the debugger. */ | |
3615 | |
3616 void | |
3617 debug_sra_elt_name (struct sra_elt *elt) | |
3618 { | |
3619 dump_sra_elt_name (stderr, elt); | |
3620 fputc ('\n', stderr); | |
3621 } | |
3622 | |
3623 void | |
3624 sra_init_cache (void) | |
3625 { | |
3626 if (sra_type_decomp_cache) | |
3627 return; | |
3628 | |
3629 sra_type_decomp_cache = BITMAP_ALLOC (NULL); | |
3630 sra_type_inst_cache = BITMAP_ALLOC (NULL); | |
3631 } | |
3632 | |
3633 | |
3634 /* Main entry point. */ | |
3635 | |
3636 static unsigned int | |
3637 tree_sra (void) | |
3638 { | |
3639 /* Initialize local variables. */ | |
3640 todoflags = 0; | |
3641 gcc_obstack_init (&sra_obstack); | |
3642 sra_candidates = BITMAP_ALLOC (NULL); | |
3643 needs_copy_in = BITMAP_ALLOC (NULL); | |
3644 sra_init_cache (); | |
3645 sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); | |
3646 | |
3647 /* Scan. If we find anything, instantiate and scalarize. */ | |
3648 if (find_candidates_for_sra ()) | |
3649 { | |
3650 scan_function (); | |
3651 decide_instantiations (); | |
3652 scalarize_function (); | |
3653 if (!bitmap_empty_p (sra_candidates)) | |
3654 todoflags |= TODO_rebuild_alias; | |
3655 } | |
3656 | |
3657 /* Free allocated memory. */ | |
3658 htab_delete (sra_map); | |
3659 sra_map = NULL; | |
3660 BITMAP_FREE (sra_candidates); | |
3661 BITMAP_FREE (needs_copy_in); | |
3662 BITMAP_FREE (sra_type_decomp_cache); | |
3663 BITMAP_FREE (sra_type_inst_cache); | |
3664 obstack_free (&sra_obstack, NULL); | |
3665 return todoflags; | |
3666 } | |
3667 | |
3668 static unsigned int | |
3669 tree_sra_early (void) | |
3670 { | |
3671 unsigned int ret; | |
3672 | |
3673 early_sra = true; | |
3674 ret = tree_sra (); | |
3675 early_sra = false; | |
3676 | |
3677 return ret & ~TODO_rebuild_alias; | |
3678 } | |
3679 | |
3680 static bool | |
3681 gate_sra (void) | |
3682 { | |
3683 return flag_tree_sra != 0; | |
3684 } | |
3685 | |
3686 struct gimple_opt_pass pass_sra_early = | |
3687 { | |
3688 { | |
3689 GIMPLE_PASS, | |
3690 "esra", /* name */ | |
3691 gate_sra, /* gate */ | |
3692 tree_sra_early, /* execute */ | |
3693 NULL, /* sub */ | |
3694 NULL, /* next */ | |
3695 0, /* static_pass_number */ | |
3696 TV_TREE_SRA, /* tv_id */ | |
3697 PROP_cfg | PROP_ssa, /* properties_required */ | |
3698 0, /* properties_provided */ | |
3699 0, /* properties_destroyed */ | |
3700 0, /* todo_flags_start */ | |
3701 TODO_dump_func | |
3702 | TODO_update_ssa | |
3703 | TODO_ggc_collect | |
3704 | TODO_verify_ssa /* todo_flags_finish */ | |
3705 } | |
3706 }; | |
3707 | |
3708 struct gimple_opt_pass pass_sra = | |
3709 { | |
3710 { | |
3711 GIMPLE_PASS, | |
3712 "sra", /* name */ | |
3713 gate_sra, /* gate */ | |
3714 tree_sra, /* execute */ | |
3715 NULL, /* sub */ | |
3716 NULL, /* next */ | |
3717 0, /* static_pass_number */ | |
3718 TV_TREE_SRA, /* tv_id */ | |
3719 PROP_cfg | PROP_ssa, /* properties_required */ | |
3720 0, /* properties_provided */ | |
3721 0, /* properties_destroyed */ | |
3722 0, /* todo_flags_start */ | |
3723 TODO_dump_func | |
3724 | TODO_update_ssa | |
3725 | TODO_ggc_collect | |
3726 | TODO_verify_ssa /* todo_flags_finish */ | |
3727 } | |
3728 }; |