comparison gcc/matrix-reorg.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /*
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
26
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
30
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
34
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
37
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
39
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
41
42 The two main phases of the optimization are the analysis
43 and transformation.
44 The driver of the optimization is matrix_reorg ().
45
46
47
48 Analysis phase:
49 ===============
50
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
57
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
60
61 Transformation phase:
62 =====================
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
67
68 Matrix Transposing
69 ==================
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
73
74 For example, lets look at the matrix accesses in the following loop:
75
76 for (i=0; i<N; i++)
77 for (j=0; j<M; j++)
78 access to a[i][j]
79
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
82
83 However, if the accesses of the matrix were of the following form:
84
85 for (i=0; i<N; i++)
86 for (j=0; j<M; j++)
87 access to a[j][i]
88
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
93
94 This example, of course, could be enhanced to multiple dimensions matrices
95 as well.
96
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
103
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
107
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
111
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "c-tree.h"
119 #include "tree-inline.h"
120 #include "tree-flow.h"
121 #include "tree-flow-inline.h"
122 #include "langhooks.h"
123 #include "hashtab.h"
124 #include "toplev.h"
125 #include "flags.h"
126 #include "ggc.h"
127 #include "debug.h"
128 #include "target.h"
129 #include "cgraph.h"
130 #include "diagnostic.h"
131 #include "timevar.h"
132 #include "params.h"
133 #include "fibheap.h"
134 #include "c-common.h"
135 #include "intl.h"
136 #include "function.h"
137 #include "basic-block.h"
138 #include "cfgloop.h"
139 #include "tree-iterator.h"
140 #include "tree-pass.h"
141 #include "opts.h"
142 #include "tree-data-ref.h"
143 #include "tree-chrec.h"
144 #include "tree-scalar-evolution.h"
145
146 /* We need to collect a lot of data from the original malloc,
147 particularly as the gimplifier has converted:
148
149 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
150
151 into
152
153 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
154 T4 = malloc (T3);
155 T5 = (struct_type *) T4;
156 orig_var = T5;
157
158 The following struct fields allow us to collect all the necessary data from
159 the gimplified program. The comments in the struct below are all based
160 on the gimple example above. */
161
162 struct malloc_call_data
163 {
164 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
165 tree size_var; /* Var decl for T3. */
166 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
167 };
168
169 static tree can_calculate_expr_before_stmt (tree, sbitmap);
170 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
171
172 /* The front end of the compiler, when parsing statements of the form:
173
174 var = (type_cast) malloc (sizeof (type));
175
176 always converts this single statement into the following statements
177 (GIMPLE form):
178
179 T.1 = sizeof (type);
180 T.2 = malloc (T.1);
181 T.3 = (type_cast) T.2;
182 var = T.3;
183
184 Since we need to create new malloc statements and modify the original
185 statements somewhat, we need to find all four of the above statements.
186 Currently record_call_1 (called for building cgraph edges) finds and
187 records the statements containing the actual call to malloc, but we
188 need to find the rest of the variables/statements on our own. That
189 is what the following function does. */
190 static void
191 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
192 {
193 tree size_var = NULL;
194 tree malloc_fn_decl;
195 tree arg1;
196
197 gcc_assert (is_gimple_call (stmt));
198
199 malloc_fn_decl = gimple_call_fndecl (stmt);
200 if (malloc_fn_decl == NULL
201 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
202 return;
203
204 arg1 = gimple_call_arg (stmt, 0);
205 size_var = arg1;
206
207 m_data->call_stmt = stmt;
208 m_data->size_var = size_var;
209 if (TREE_CODE (size_var) != VAR_DECL)
210 m_data->malloc_size = size_var;
211 else
212 m_data->malloc_size = NULL_TREE;
213 }
214
215 /* Information about matrix access site.
216 For example: if an access site of matrix arr is arr[i][j]
217 the ACCESS_SITE_INFO structure will have the address
218 of arr as its stmt. The INDEX_INFO will hold information about the
219 initial address and index of each dimension. */
220 struct access_site_info
221 {
222 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
223 gimple stmt;
224
225 /* In case of POINTER_PLUS_EXPR, what is the offset. */
226 tree offset;
227
228 /* The index which created the offset. */
229 tree index;
230
231 /* The indirection level of this statement. */
232 int level;
233
234 /* TRUE for allocation site FALSE for access site. */
235 bool is_alloc;
236
237 /* The function containing the access site. */
238 tree function_decl;
239
240 /* This access is iterated in the inner most loop */
241 bool iterated_by_inner_most_loop_p;
242 };
243
244 typedef struct access_site_info *access_site_info_p;
245 DEF_VEC_P (access_site_info_p);
246 DEF_VEC_ALLOC_P (access_site_info_p, heap);
247
248 /* Information about matrix to flatten. */
249 struct matrix_info
250 {
251 /* Decl tree of this matrix. */
252 tree decl;
253 /* Number of dimensions; number
254 of "*" in the type declaration. */
255 int num_dims;
256
257 /* Minimum indirection level that escapes, 0 means that
258 the whole matrix escapes, k means that dimensions
259 0 to ACTUAL_DIM - k escapes. */
260 int min_indirect_level_escape;
261
262 gimple min_indirect_level_escape_stmt;
263
264 /* Hold the allocation site for each level (dimension).
265 We can use NUM_DIMS as the upper bound and allocate the array
266 once with this number of elements and no need to use realloc and
267 MAX_MALLOCED_LEVEL. */
268 gimple *malloc_for_level;
269
270 int max_malloced_level;
271
272 /* Is the matrix transposed. */
273 bool is_transposed_p;
274
275 /* The location of the allocation sites (they must be in one
276 function). */
277 tree allocation_function_decl;
278
279 /* The calls to free for each level of indirection. */
280 struct free_info
281 {
282 gimple stmt;
283 tree func;
284 } *free_stmts;
285
286 /* An array which holds for each dimension its size. where
287 dimension 0 is the outer most (one that contains all the others).
288 */
289 tree *dimension_size;
290
291 /* An array which holds for each dimension it's original size
292 (before transposing and flattening take place). */
293 tree *dimension_size_orig;
294
295 /* An array which holds for each dimension the size of the type of
296 of elements accessed in that level (in bytes). */
297 HOST_WIDE_INT *dimension_type_size;
298
299 int dimension_type_size_len;
300
301 /* An array collecting the count of accesses for each dimension. */
302 gcov_type *dim_hot_level;
303
304 /* An array of the accesses to be flattened.
305 elements are of type "struct access_site_info *". */
306 VEC (access_site_info_p, heap) * access_l;
307
308 /* A map of how the dimensions will be organized at the end of
309 the analyses. */
310 int *dim_map;
311 };
312
313 /* In each phi node we want to record the indirection level we have when we
314 get to the phi node. Usually we will have phi nodes with more than two
315 arguments, then we must assure that all of them get to the phi node with
316 the same indirection level, otherwise it's not safe to do the flattening.
317 So we record the information regarding the indirection level each time we
318 get to the phi node in this hash table. */
319
320 struct matrix_access_phi_node
321 {
322 gimple phi;
323 int indirection_level;
324 };
325
326 /* We use this structure to find if the SSA variable is accessed inside the
327 tree and record the tree containing it. */
328
329 struct ssa_acc_in_tree
330 {
331 /* The variable whose accesses in the tree we are looking for. */
332 tree ssa_var;
333 /* The tree and code inside it the ssa_var is accessed, currently
334 it could be an INDIRECT_REF or CALL_EXPR. */
335 enum tree_code t_code;
336 tree t_tree;
337 /* The place in the containing tree. */
338 tree *tp;
339 tree second_op;
340 bool var_found;
341 };
342
343 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
344 sbitmap, bool);
345 static int transform_allocation_sites (void **, void *);
346 static int transform_access_sites (void **, void *);
347 static int analyze_transpose (void **, void *);
348 static int dump_matrix_reorg_analysis (void **, void *);
349
350 static bool check_transpose_p;
351
352 /* Hash function used for the phi nodes. */
353
354 static hashval_t
355 mat_acc_phi_hash (const void *p)
356 {
357 const struct matrix_access_phi_node *const ma_phi =
358 (const struct matrix_access_phi_node *) p;
359
360 return htab_hash_pointer (ma_phi->phi);
361 }
362
363 /* Equality means phi node pointers are the same. */
364
365 static int
366 mat_acc_phi_eq (const void *p1, const void *p2)
367 {
368 const struct matrix_access_phi_node *const phi1 =
369 (const struct matrix_access_phi_node *) p1;
370 const struct matrix_access_phi_node *const phi2 =
371 (const struct matrix_access_phi_node *) p2;
372
373 if (phi1->phi == phi2->phi)
374 return 1;
375
376 return 0;
377 }
378
379 /* Hold the PHI nodes we visit during the traversal for escaping
380 analysis. */
381 static htab_t htab_mat_acc_phi_nodes = NULL;
382
383 /* This hash-table holds the information about the matrices we are
384 going to handle. */
385 static htab_t matrices_to_reorg = NULL;
386
387 /* Return a hash for MTT, which is really a "matrix_info *". */
388 static hashval_t
389 mtt_info_hash (const void *mtt)
390 {
391 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
392 }
393
394 /* Return true if MTT1 and MTT2 (which are really both of type
395 "matrix_info *") refer to the same decl. */
396 static int
397 mtt_info_eq (const void *mtt1, const void *mtt2)
398 {
399 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
400 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
401
402 if (i1->decl == i2->decl)
403 return true;
404
405 return false;
406 }
407
408 /* Return false if STMT may contain a vector expression.
409 In this situation, all matrices should not be flattened. */
410 static bool
411 may_flatten_matrices_1 (gimple stmt)
412 {
413 tree t;
414
415 switch (gimple_code (stmt))
416 {
417 case GIMPLE_ASSIGN:
418 if (!gimple_assign_cast_p (stmt))
419 return true;
420
421 t = gimple_assign_rhs1 (stmt);
422 while (CONVERT_EXPR_P (t))
423 {
424 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
425 {
426 tree pointee;
427
428 pointee = TREE_TYPE (t);
429 while (POINTER_TYPE_P (pointee))
430 pointee = TREE_TYPE (pointee);
431 if (TREE_CODE (pointee) == VECTOR_TYPE)
432 {
433 if (dump_file)
434 fprintf (dump_file,
435 "Found vector type, don't flatten matrix\n");
436 return false;
437 }
438 }
439 t = TREE_OPERAND (t, 0);
440 }
441 break;
442 case GIMPLE_ASM:
443 /* Asm code could contain vector operations. */
444 return false;
445 break;
446 default:
447 break;
448 }
449 return true;
450 }
451
452 /* Return false if there are hand-written vectors in the program.
453 We disable the flattening in such a case. */
454 static bool
455 may_flatten_matrices (struct cgraph_node *node)
456 {
457 tree decl;
458 struct function *func;
459 basic_block bb;
460 gimple_stmt_iterator gsi;
461
462 decl = node->decl;
463 if (node->analyzed)
464 {
465 func = DECL_STRUCT_FUNCTION (decl);
466 FOR_EACH_BB_FN (bb, func)
467 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
468 if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
469 return false;
470 }
471 return true;
472 }
473
474 /* Given a VAR_DECL, check its type to determine whether it is
475 a definition of a dynamic allocated matrix and therefore is
476 a suitable candidate for the matrix flattening optimization.
477 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
478 a MATRIX_INFO structure, fill it with the relevant information
479 and return a pointer to it.
480 TODO: handle also statically defined arrays. */
481 static struct matrix_info *
482 analyze_matrix_decl (tree var_decl)
483 {
484 struct matrix_info *m_node, tmpmi, *mi;
485 tree var_type;
486 int dim_num = 0;
487
488 gcc_assert (matrices_to_reorg);
489
490 if (TREE_CODE (var_decl) == PARM_DECL)
491 var_type = DECL_ARG_TYPE (var_decl);
492 else if (TREE_CODE (var_decl) == VAR_DECL)
493 var_type = TREE_TYPE (var_decl);
494 else
495 return NULL;
496
497 if (!POINTER_TYPE_P (var_type))
498 return NULL;
499
500 while (POINTER_TYPE_P (var_type))
501 {
502 var_type = TREE_TYPE (var_type);
503 dim_num++;
504 }
505
506 if (dim_num <= 1)
507 return NULL;
508
509 if (!COMPLETE_TYPE_P (var_type)
510 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
511 return NULL;
512
513 /* Check to see if this pointer is already in there. */
514 tmpmi.decl = var_decl;
515 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
516
517 if (mi)
518 return NULL;
519
520 /* Record the matrix. */
521
522 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
523 m_node->decl = var_decl;
524 m_node->num_dims = dim_num;
525 m_node->free_stmts
526 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
527
528 /* Init min_indirect_level_escape to -1 to indicate that no escape
529 analysis has been done yet. */
530 m_node->min_indirect_level_escape = -1;
531 m_node->is_transposed_p = false;
532
533 return m_node;
534 }
535
536 /* Free matrix E. */
537 static void
538 mat_free (void *e)
539 {
540 struct matrix_info *mat = (struct matrix_info *) e;
541
542 if (!mat)
543 return;
544
545 if (mat->free_stmts)
546 free (mat->free_stmts);
547 if (mat->dim_hot_level)
548 free (mat->dim_hot_level);
549 if (mat->malloc_for_level)
550 free (mat->malloc_for_level);
551 }
552
553 /* Find all potential matrices.
554 TODO: currently we handle only multidimensional
555 dynamically allocated arrays. */
556 static void
557 find_matrices_decl (void)
558 {
559 struct matrix_info *tmp;
560 PTR *slot;
561 struct varpool_node *vnode;
562
563 gcc_assert (matrices_to_reorg);
564
565 /* For every global variable in the program:
566 Check to see if it's of a candidate type and record it. */
567 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
568 {
569 tree var_decl = vnode->decl;
570
571 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
572 continue;
573
574 if (matrices_to_reorg)
575 if ((tmp = analyze_matrix_decl (var_decl)))
576 {
577 if (!TREE_ADDRESSABLE (var_decl))
578 {
579 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
580 *slot = tmp;
581 }
582 }
583 }
584 return;
585 }
586
587 /* Mark that the matrix MI escapes at level L. */
588 static void
589 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
590 {
591 if (mi->min_indirect_level_escape == -1
592 || (mi->min_indirect_level_escape > l))
593 {
594 mi->min_indirect_level_escape = l;
595 mi->min_indirect_level_escape_stmt = s;
596 }
597 }
598
599 /* Find if the SSA variable is accessed inside the
600 tree and record the tree containing it.
601 The only relevant uses are the case of SSA_NAME, or SSA inside
602 INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
603 static void
604 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
605 {
606 a->t_code = TREE_CODE (t);
607 switch (a->t_code)
608 {
609 case SSA_NAME:
610 if (t == a->ssa_var)
611 a->var_found = true;
612 break;
613 case INDIRECT_REF:
614 if (SSA_VAR_P (TREE_OPERAND (t, 0))
615 && TREE_OPERAND (t, 0) == a->ssa_var)
616 a->var_found = true;
617 break;
618 default:
619 break;
620 }
621 }
622
623 /* Find if the SSA variable is accessed on the right hand side of
624 gimple call STMT. */
625
626 static void
627 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
628 {
629 tree decl;
630 tree arg;
631 size_t i;
632
633 a->t_code = CALL_EXPR;
634 for (i = 0; i < gimple_call_num_args (stmt); i++)
635 {
636 arg = gimple_call_arg (stmt, i);
637 if (arg == a->ssa_var)
638 {
639 a->var_found = true;
640 decl = gimple_call_fndecl (stmt);
641 a->t_tree = decl;
642 break;
643 }
644 }
645 }
646
647 /* Find if the SSA variable is accessed on the right hand side of
648 gimple assign STMT. */
649
650 static void
651 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
652 {
653
654 a->t_code = gimple_assign_rhs_code (stmt);
655 switch (a->t_code)
656 {
657 tree op1, op2;
658
659 case SSA_NAME:
660 case INDIRECT_REF:
661 CASE_CONVERT:
662 case VIEW_CONVERT_EXPR:
663 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
664 break;
665 case POINTER_PLUS_EXPR:
666 case PLUS_EXPR:
667 case MULT_EXPR:
668 op1 = gimple_assign_rhs1 (stmt);
669 op2 = gimple_assign_rhs2 (stmt);
670
671 if (op1 == a->ssa_var)
672 {
673 a->var_found = true;
674 a->second_op = op2;
675 }
676 else if (op2 == a->ssa_var)
677 {
678 a->var_found = true;
679 a->second_op = op1;
680 }
681 break;
682 default:
683 break;
684 }
685 }
686
687 /* Record the access/allocation site information for matrix MI so we can
688 handle it later in transformation. */
689 static void
690 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
691 tree index, int level, bool is_alloc)
692 {
693 struct access_site_info *acc_info;
694
695 if (!mi->access_l)
696 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
697
698 acc_info
699 = (struct access_site_info *)
700 xcalloc (1, sizeof (struct access_site_info));
701 acc_info->stmt = stmt;
702 acc_info->offset = offset;
703 acc_info->index = index;
704 acc_info->function_decl = current_function_decl;
705 acc_info->level = level;
706 acc_info->is_alloc = is_alloc;
707
708 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
709
710 }
711
712 /* Record the malloc as the allocation site of the given LEVEL. But
713 first we Make sure that all the size parameters passed to malloc in
714 all the allocation sites could be pre-calculated before the call to
715 the malloc of level 0 (the main malloc call). */
716 static void
717 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
718 {
719 struct malloc_call_data mcd;
720
721 /* Make sure that the allocation sites are in the same function. */
722 if (!mi->allocation_function_decl)
723 mi->allocation_function_decl = current_function_decl;
724 else if (mi->allocation_function_decl != current_function_decl)
725 {
726 int min_malloc_level;
727
728 gcc_assert (mi->malloc_for_level);
729
730 /* Find the minimum malloc level that already has been seen;
731 we known its allocation function must be
732 MI->allocation_function_decl since it's different than
733 CURRENT_FUNCTION_DECL then the escaping level should be
734 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
735 must be set accordingly. */
736 for (min_malloc_level = 0;
737 min_malloc_level < mi->max_malloced_level
738 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
739 if (level < min_malloc_level)
740 {
741 mi->allocation_function_decl = current_function_decl;
742 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
743 }
744 else
745 {
746 mark_min_matrix_escape_level (mi, level, stmt);
747 /* cannot be that (level == min_malloc_level)
748 we would have returned earlier. */
749 return;
750 }
751 }
752
753 /* Find the correct malloc information. */
754 collect_data_for_malloc_call (stmt, &mcd);
755
756 /* We accept only calls to malloc function; we do not accept
757 calls like calloc and realloc. */
758 if (!mi->malloc_for_level)
759 {
760 mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
761 mi->max_malloced_level = level + 1;
762 }
763 else if (mi->max_malloced_level <= level)
764 {
765 mi->malloc_for_level
766 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
767
768 /* Zero the newly allocated items. */
769 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
770 0, (level - mi->max_malloced_level) * sizeof (tree));
771
772 mi->max_malloced_level = level + 1;
773 }
774 mi->malloc_for_level[level] = stmt;
775 }
776
777 /* Given an assignment statement STMT that we know that its
778 left-hand-side is the matrix MI variable, we traverse the immediate
779 uses backwards until we get to a malloc site. We make sure that
780 there is one and only one malloc site that sets this variable. When
781 we are performing the flattening we generate a new variable that
782 will hold the size for each dimension; each malloc that allocates a
783 dimension has the size parameter; we use that parameter to
784 initialize the dimension size variable so we can use it later in
785 the address calculations. LEVEL is the dimension we're inspecting.
786 Return if STMT is related to an allocation site. */
787
788 static void
789 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
790 int level, sbitmap visited)
791 {
792 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
793 {
794 tree rhs = gimple_assign_rhs1 (stmt);
795
796 if (TREE_CODE (rhs) == SSA_NAME)
797 {
798 gimple def = SSA_NAME_DEF_STMT (rhs);
799
800 analyze_matrix_allocation_site (mi, def, level, visited);
801 return;
802 }
803 /* If we are back to the original matrix variable then we
804 are sure that this is analyzed as an access site. */
805 else if (rhs == mi->decl)
806 return;
807 }
808 /* A result of call to malloc. */
809 else if (is_gimple_call (stmt))
810 {
811 int call_flags = gimple_call_flags (stmt);
812
813 if (!(call_flags & ECF_MALLOC))
814 {
815 mark_min_matrix_escape_level (mi, level, stmt);
816 return;
817 }
818 else
819 {
820 tree malloc_fn_decl;
821 const char *malloc_fname;
822
823 malloc_fn_decl = gimple_call_fndecl (stmt);
824 if (malloc_fn_decl == NULL_TREE)
825 {
826 mark_min_matrix_escape_level (mi, level, stmt);
827 return;
828 }
829 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
830 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
831 {
832 if (dump_file)
833 fprintf (dump_file,
834 "Matrix %s is an argument to function %s\n",
835 get_name (mi->decl), get_name (malloc_fn_decl));
836 mark_min_matrix_escape_level (mi, level, stmt);
837 return;
838 }
839 }
840 /* This is a call to malloc of level 'level'.
841 mi->max_malloced_level-1 == level means that we've
842 seen a malloc statement of level 'level' before.
843 If the statement is not the same one that we've
844 seen before, then there's another malloc statement
845 for the same level, which means that we need to mark
846 it escaping. */
847 if (mi->malloc_for_level
848 && mi->max_malloced_level-1 == level
849 && mi->malloc_for_level[level] != stmt)
850 {
851 mark_min_matrix_escape_level (mi, level, stmt);
852 return;
853 }
854 else
855 add_allocation_site (mi, stmt, level);
856 return;
857 }
858 /* Looks like we don't know what is happening in this
859 statement so be in the safe side and mark it as escaping. */
860 mark_min_matrix_escape_level (mi, level, stmt);
861 }
862
863 /* The transposing decision making.
864 In order to to calculate the profitability of transposing, we collect two
865 types of information regarding the accesses:
866 1. profiling information used to express the hotness of an access, that
867 is how often the matrix is accessed by this access site (count of the
868 access site).
869 2. which dimension in the access site is iterated by the inner
870 most loop containing this access.
871
872 The matrix will have a calculated value of weighted hotness for each
873 dimension.
874 Intuitively the hotness level of a dimension is a function of how
875 many times it was the most frequently accessed dimension in the
876 highly executed access sites of this matrix.
877
878 As computed by following equation:
879 m n
880 __ __
881 \ \ dim_hot_level[i] +=
882 /_ /_
883 j i
884 acc[j]->dim[i]->iter_by_inner_loop * count(j)
885
886 Where n is the number of dims and m is the number of the matrix
887 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
888 iterates over dim[i] in innermost loop, and is 0 otherwise.
889
890 The organization of the new matrix should be according to the
891 hotness of each dimension. The hotness of the dimension implies
892 the locality of the elements.*/
893 static int
894 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
895 {
896 struct matrix_info *mi = (struct matrix_info *) *slot;
897 int min_escape_l = mi->min_indirect_level_escape;
898 struct loop *loop;
899 affine_iv iv;
900 struct access_site_info *acc_info;
901 int i;
902
903 if (min_escape_l < 2 || !mi->access_l)
904 {
905 if (mi->access_l)
906 {
907 for (i = 0;
908 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
909 i++)
910 free (acc_info);
911 VEC_free (access_site_info_p, heap, mi->access_l);
912
913 }
914 return 1;
915 }
916 if (!mi->dim_hot_level)
917 mi->dim_hot_level =
918 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
919
920
921 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
922 i++)
923 {
924 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
925 && acc_info->level < min_escape_l)
926 {
927 loop = loop_containing_stmt (acc_info->stmt);
928 if (!loop || loop->inner)
929 {
930 free (acc_info);
931 continue;
932 }
933 if (simple_iv (loop, loop, acc_info->offset, &iv, true))
934 {
935 if (iv.step != NULL)
936 {
937 HOST_WIDE_INT istep;
938
939 istep = int_cst_value (iv.step);
940 if (istep != 0)
941 {
942 acc_info->iterated_by_inner_most_loop_p = 1;
943 mi->dim_hot_level[acc_info->level] +=
944 gimple_bb (acc_info->stmt)->count;
945 }
946
947 }
948 }
949 }
950 free (acc_info);
951 }
952 VEC_free (access_site_info_p, heap, mi->access_l);
953
954 return 1;
955 }
956
957 /* Find the index which defines the OFFSET from base.
958 We walk from use to def until we find how the offset was defined. */
959 static tree
960 get_index_from_offset (tree offset, gimple def_stmt)
961 {
962 tree op1, op2, index;
963
964 if (gimple_code (def_stmt) == GIMPLE_PHI)
965 return NULL;
966 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
967 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
968 return get_index_from_offset (offset,
969 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
970 else if (is_gimple_assign (def_stmt)
971 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
972 {
973 op1 = gimple_assign_rhs1 (def_stmt);
974 op2 = gimple_assign_rhs2 (def_stmt);
975 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
976 return NULL;
977 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
978 return index;
979 }
980 else
981 return NULL_TREE;
982 }
983
984 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
985 of the type related to the SSA_VAR, or the type related to the
986 lhs of STMT, in the case that it is an INDIRECT_REF. */
987 static void
988 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
989 int current_indirect_level)
990 {
991 tree lhs;
992 HOST_WIDE_INT type_size;
993
994 /* Update type according to the type of the INDIRECT_REF expr. */
995 if (is_gimple_assign (stmt)
996 && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
997 {
998 lhs = gimple_assign_lhs (stmt);
999 gcc_assert (POINTER_TYPE_P
1000 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1001 type_size =
1002 int_size_in_bytes (TREE_TYPE
1003 (TREE_TYPE
1004 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1005 }
1006 else
1007 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1008
1009 /* Record the size of elements accessed (as a whole)
1010 in the current indirection level (dimension). If the size of
1011 elements is not known at compile time, mark it as escaping. */
1012 if (type_size <= 0)
1013 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1014 else
1015 {
1016 int l = current_indirect_level;
1017
1018 if (!mi->dimension_type_size)
1019 {
1020 mi->dimension_type_size
1021 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1022 mi->dimension_type_size_len = l + 1;
1023 }
1024 else if (mi->dimension_type_size_len < l + 1)
1025 {
1026 mi->dimension_type_size
1027 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1028 (l + 1) * sizeof (HOST_WIDE_INT));
1029 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1030 0, (l + 1 - mi->dimension_type_size_len)
1031 * sizeof (HOST_WIDE_INT));
1032 mi->dimension_type_size_len = l + 1;
1033 }
1034 /* Make sure all the accesses in the same level have the same size
1035 of the type. */
1036 if (!mi->dimension_type_size[l])
1037 mi->dimension_type_size[l] = type_size;
1038 else if (mi->dimension_type_size[l] != type_size)
1039 mark_min_matrix_escape_level (mi, l, stmt);
1040 }
1041 }
1042
1043 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1044 ssa var that we want to check because it came from some use of matrix
1045 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1046 far. */
1047
1048 static int
1049 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1050 gimple use_stmt, int current_indirect_level)
1051 {
1052 tree fndecl = gimple_call_fndecl (use_stmt);
1053
1054 if (gimple_call_lhs (use_stmt))
1055 {
1056 tree lhs = gimple_call_lhs (use_stmt);
1057 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1058
1059 memset (&lhs_acc, 0, sizeof (lhs_acc));
1060 memset (&rhs_acc, 0, sizeof (rhs_acc));
1061
1062 lhs_acc.ssa_var = ssa_var;
1063 lhs_acc.t_code = ERROR_MARK;
1064 ssa_accessed_in_tree (lhs, &lhs_acc);
1065 rhs_acc.ssa_var = ssa_var;
1066 rhs_acc.t_code = ERROR_MARK;
1067 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1068
1069 /* The SSA must be either in the left side or in the right side,
1070 to understand what is happening.
1071 In case the SSA_NAME is found in both sides we should be escaping
1072 at this level because in this case we cannot calculate the
1073 address correctly. */
1074 if ((lhs_acc.var_found && rhs_acc.var_found
1075 && lhs_acc.t_code == INDIRECT_REF)
1076 || (!rhs_acc.var_found && !lhs_acc.var_found))
1077 {
1078 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1079 return current_indirect_level;
1080 }
1081 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1082
1083 /* If we are storing to the matrix at some level, then mark it as
1084 escaping at that level. */
1085 if (lhs_acc.var_found)
1086 {
1087 int l = current_indirect_level + 1;
1088
1089 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1090 mark_min_matrix_escape_level (mi, l, use_stmt);
1091 return current_indirect_level;
1092 }
1093 }
1094
1095 if (fndecl)
1096 {
1097 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1098 {
1099 if (dump_file)
1100 fprintf (dump_file,
1101 "Matrix %s: Function call %s, level %d escapes.\n",
1102 get_name (mi->decl), get_name (fndecl),
1103 current_indirect_level);
1104 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1105 }
1106 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1107 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1108 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1109 else
1110 {
1111 /*Record the free statements so we can delete them
1112 later. */
1113 int l = current_indirect_level;
1114
1115 mi->free_stmts[l].stmt = use_stmt;
1116 mi->free_stmts[l].func = current_function_decl;
1117 }
1118 }
1119 return current_indirect_level;
1120 }
1121
1122 /* USE_STMT represents a phi node of the ssa var that we want to
1123 check because it came from some use of matrix
1124 MI.
1125 We check all the escaping levels that get to the PHI node
1126 and make sure they are all the same escaping;
1127 if not (which is rare) we let the escaping level be the
1128 minimum level that gets into that PHI because starting from
1129 that level we cannot expect the behavior of the indirections.
1130 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1131
1132 static void
1133 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1134 int current_indirect_level, sbitmap visited,
1135 bool record_accesses)
1136 {
1137
1138 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1139
1140 tmp_maphi.phi = use_stmt;
1141 if ((maphi = (struct matrix_access_phi_node *)
1142 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1143 {
1144 if (maphi->indirection_level == current_indirect_level)
1145 return;
1146 else
1147 {
1148 int level = MIN (maphi->indirection_level,
1149 current_indirect_level);
1150 size_t j;
1151 gimple stmt = NULL;
1152
1153 maphi->indirection_level = level;
1154 for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1155 {
1156 tree def = PHI_ARG_DEF (use_stmt, j);
1157
1158 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1159 stmt = SSA_NAME_DEF_STMT (def);
1160 }
1161 mark_min_matrix_escape_level (mi, level, stmt);
1162 }
1163 return;
1164 }
1165 maphi = (struct matrix_access_phi_node *)
1166 xcalloc (1, sizeof (struct matrix_access_phi_node));
1167 maphi->phi = use_stmt;
1168 maphi->indirection_level = current_indirect_level;
1169
1170 /* Insert to hash table. */
1171 pmaphi = (struct matrix_access_phi_node **)
1172 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1173 gcc_assert (pmaphi);
1174 *pmaphi = maphi;
1175
1176 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1177 {
1178 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1179 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1180 current_indirect_level, false, visited,
1181 record_accesses);
1182 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1183 }
1184 }
1185
1186 /* USE_STMT represents an assign statement (the rhs or lhs include
1187 the ssa var that we want to check because it came from some use of matrix
1188 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1189
1190 static int
1191 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1192 gimple use_stmt, int current_indirect_level,
1193 bool last_op, sbitmap visited,
1194 bool record_accesses)
1195 {
1196 tree lhs = gimple_get_lhs (use_stmt);
1197 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1198
1199 memset (&lhs_acc, 0, sizeof (lhs_acc));
1200 memset (&rhs_acc, 0, sizeof (rhs_acc));
1201
1202 lhs_acc.ssa_var = ssa_var;
1203 lhs_acc.t_code = ERROR_MARK;
1204 ssa_accessed_in_tree (lhs, &lhs_acc);
1205 rhs_acc.ssa_var = ssa_var;
1206 rhs_acc.t_code = ERROR_MARK;
1207 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1208
1209 /* The SSA must be either in the left side or in the right side,
1210 to understand what is happening.
1211 In case the SSA_NAME is found in both sides we should be escaping
1212 at this level because in this case we cannot calculate the
1213 address correctly. */
1214 if ((lhs_acc.var_found && rhs_acc.var_found
1215 && lhs_acc.t_code == INDIRECT_REF)
1216 || (!rhs_acc.var_found && !lhs_acc.var_found))
1217 {
1218 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1219 return current_indirect_level;
1220 }
1221 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1222
1223 /* If we are storing to the matrix at some level, then mark it as
1224 escaping at that level. */
1225 if (lhs_acc.var_found)
1226 {
1227 int l = current_indirect_level + 1;
1228
1229 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1230
1231 if (!(gimple_assign_copy_p (use_stmt)
1232 || gimple_assign_cast_p (use_stmt))
1233 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1234 mark_min_matrix_escape_level (mi, l, use_stmt);
1235 else
1236 {
1237 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1238 analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1239 if (record_accesses)
1240 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1241 NULL_TREE, l, true);
1242 update_type_size (mi, use_stmt, NULL, l);
1243 }
1244 return current_indirect_level;
1245 }
1246 /* Now, check the right-hand-side, to see how the SSA variable
1247 is used. */
1248 if (rhs_acc.var_found)
1249 {
1250 if (rhs_acc.t_code != INDIRECT_REF
1251 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1252 {
1253 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1254 return current_indirect_level;
1255 }
1256 /* If the access in the RHS has an indirection increase the
1257 indirection level. */
1258 if (rhs_acc.t_code == INDIRECT_REF)
1259 {
1260 if (record_accesses)
1261 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1262 NULL_TREE,
1263 current_indirect_level, true);
1264 current_indirect_level += 1;
1265 }
1266 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1267 {
1268 gcc_assert (rhs_acc.second_op);
1269 if (last_op)
1270 /* Currently we support only one PLUS expression on the
1271 SSA_NAME that holds the base address of the current
1272 indirection level; to support more general case there
1273 is a need to hold a stack of expressions and regenerate
1274 the calculation later. */
1275 mark_min_matrix_escape_level (mi, current_indirect_level,
1276 use_stmt);
1277 else
1278 {
1279 tree index;
1280 tree op1, op2;
1281
1282 op1 = gimple_assign_rhs1 (use_stmt);
1283 op2 = gimple_assign_rhs2 (use_stmt);
1284
1285 op2 = (op1 == ssa_var) ? op2 : op1;
1286 if (TREE_CODE (op2) == INTEGER_CST)
1287 index =
1288 build_int_cst (TREE_TYPE (op1),
1289 TREE_INT_CST_LOW (op2) /
1290 int_size_in_bytes (TREE_TYPE (op1)));
1291 else
1292 {
1293 index =
1294 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1295 if (index == NULL_TREE)
1296 {
1297 mark_min_matrix_escape_level (mi,
1298 current_indirect_level,
1299 use_stmt);
1300 return current_indirect_level;
1301 }
1302 }
1303 if (record_accesses)
1304 record_access_alloc_site_info (mi, use_stmt, op2,
1305 index,
1306 current_indirect_level, false);
1307 }
1308 }
1309 /* If we are storing this level of indirection mark it as
1310 escaping. */
1311 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1312 {
1313 int l = current_indirect_level;
1314
1315 /* One exception is when we are storing to the matrix
1316 variable itself; this is the case of malloc, we must make
1317 sure that it's the one and only one call to malloc so
1318 we call analyze_matrix_allocation_site to check
1319 this out. */
1320 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1321 mark_min_matrix_escape_level (mi, current_indirect_level,
1322 use_stmt);
1323 else
1324 {
1325 /* Also update the escaping level. */
1326 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1327 if (record_accesses)
1328 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1329 NULL_TREE, l, true);
1330 }
1331 }
1332 else
1333 {
1334 /* We are placing it in an SSA, follow that SSA. */
1335 analyze_matrix_accesses (mi, lhs,
1336 current_indirect_level,
1337 rhs_acc.t_code == POINTER_PLUS_EXPR,
1338 visited, record_accesses);
1339 }
1340 }
1341 return current_indirect_level;
1342 }
1343
1344 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1345 follow its uses and level of indirection and find out the minimum
1346 indirection level it escapes in (the highest dimension) and the maximum
1347 level it is accessed in (this will be the actual dimension of the
1348 matrix). The information is accumulated in MI.
1349 We look at the immediate uses, if one escapes we finish; if not,
1350 we make a recursive call for each one of the immediate uses of the
1351 resulting SSA name. */
1352 static void
1353 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1354 int current_indirect_level, bool last_op,
1355 sbitmap visited, bool record_accesses)
1356 {
1357 imm_use_iterator imm_iter;
1358 use_operand_p use_p;
1359
1360 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1361 current_indirect_level);
1362
1363 /* We don't go beyond the escaping level when we are performing the
1364 flattening. NOTE: we keep the last indirection level that doesn't
1365 escape. */
1366 if (mi->min_indirect_level_escape > -1
1367 && mi->min_indirect_level_escape <= current_indirect_level)
1368 return;
1369
1370 /* Now go over the uses of the SSA_NAME and check how it is used in
1371 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1372 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1373 be any number of copies and casts. */
1374 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1375
1376 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1377 {
1378 gimple use_stmt = USE_STMT (use_p);
1379 if (gimple_code (use_stmt) == GIMPLE_PHI)
1380 /* We check all the escaping levels that get to the PHI node
1381 and make sure they are all the same escaping;
1382 if not (which is rare) we let the escaping level be the
1383 minimum level that gets into that PHI because starting from
1384 that level we cannot expect the behavior of the indirections. */
1385
1386 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1387 visited, record_accesses);
1388
1389 else if (is_gimple_call (use_stmt))
1390 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1391 current_indirect_level);
1392 else if (is_gimple_assign (use_stmt))
1393 current_indirect_level =
1394 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1395 current_indirect_level, last_op,
1396 visited, record_accesses);
1397 }
1398 }
1399
1400 typedef struct
1401 {
1402 tree fn;
1403 gimple stmt;
1404 } check_var_data;
1405
1406 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1407 the malloc size expression and check that those aren't changed
1408 over the function. */
1409 static tree
1410 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1411 {
1412 basic_block bb;
1413 tree t = *tp;
1414 check_var_data *callback_data = (check_var_data*) data;
1415 tree fn = callback_data->fn;
1416 gimple_stmt_iterator gsi;
1417 gimple stmt;
1418
1419 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1420 return NULL_TREE;
1421
1422 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1423 {
1424 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1425 {
1426 stmt = gsi_stmt (gsi);
1427 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1428 continue;
1429 if (gimple_get_lhs (stmt) == t)
1430 {
1431 callback_data->stmt = stmt;
1432 return t;
1433 }
1434 }
1435 }
1436 *walk_subtrees = 1;
1437 return NULL_TREE;
1438 }
1439
1440 /* Go backwards in the use-def chains and find out the expression
1441 represented by the possible SSA name in STMT, until it is composed
1442 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1443 we make sure that all the arguments represent the same subexpression,
1444 otherwise we fail. */
1445
1446 static tree
1447 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1448 {
1449 tree op1, op2, res;
1450 enum tree_code code;
1451
1452 switch (gimple_code (stmt))
1453 {
1454 case GIMPLE_ASSIGN:
1455 code = gimple_assign_rhs_code (stmt);
1456 op1 = gimple_assign_rhs1 (stmt);
1457
1458 switch (code)
1459 {
1460 case POINTER_PLUS_EXPR:
1461 case PLUS_EXPR:
1462 case MINUS_EXPR:
1463 case MULT_EXPR:
1464
1465 op2 = gimple_assign_rhs2 (stmt);
1466 op1 = can_calculate_expr_before_stmt (op1, visited);
1467 if (!op1)
1468 return NULL_TREE;
1469 op2 = can_calculate_expr_before_stmt (op2, visited);
1470 if (op2)
1471 return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1472 return NULL_TREE;
1473
1474 CASE_CONVERT:
1475 res = can_calculate_expr_before_stmt (op1, visited);
1476 if (res != NULL_TREE)
1477 return build1 (code, gimple_expr_type (stmt), res);
1478 else
1479 return NULL_TREE;
1480
1481 default:
1482 if (gimple_assign_single_p (stmt))
1483 return can_calculate_expr_before_stmt (op1, visited);
1484 else
1485 return NULL_TREE;
1486 }
1487
1488 case GIMPLE_PHI:
1489 {
1490 size_t j;
1491
1492 res = NULL_TREE;
1493 /* Make sure all the arguments represent the same value. */
1494 for (j = 0; j < gimple_phi_num_args (stmt); j++)
1495 {
1496 tree new_res;
1497 tree def = PHI_ARG_DEF (stmt, j);
1498
1499 new_res = can_calculate_expr_before_stmt (def, visited);
1500 if (res == NULL_TREE)
1501 res = new_res;
1502 else if (!new_res || !expressions_equal_p (res, new_res))
1503 return NULL_TREE;
1504 }
1505 return res;
1506 }
1507
1508 default:
1509 return NULL_TREE;
1510 }
1511 }
1512
1513 /* Go backwards in the use-def chains and find out the expression
1514 represented by the possible SSA name in EXPR, until it is composed
1515 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1516 we make sure that all the arguments represent the same subexpression,
1517 otherwise we fail. */
1518 static tree
1519 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1520 {
1521 gimple def_stmt;
1522 tree res;
1523
1524 switch (TREE_CODE (expr))
1525 {
1526 case SSA_NAME:
1527 /* Case of loop, we don't know to represent this expression. */
1528 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1529 return NULL_TREE;
1530
1531 SET_BIT (visited, SSA_NAME_VERSION (expr));
1532 def_stmt = SSA_NAME_DEF_STMT (expr);
1533 res = can_calculate_stmt_before_stmt (def_stmt, visited);
1534 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1535 return res;
1536 case VAR_DECL:
1537 case PARM_DECL:
1538 case INTEGER_CST:
1539 return expr;
1540
1541 default:
1542 return NULL_TREE;
1543 }
1544 }
1545
1546 /* There should be only one allocation function for the dimensions
1547 that don't escape. Here we check the allocation sites in this
1548 function. We must make sure that all the dimensions are allocated
1549 using malloc and that the malloc size parameter expression could be
1550 pre-calculated before the call to the malloc of dimension 0.
1551
1552 Given a candidate matrix for flattening -- MI -- check if it's
1553 appropriate for flattening -- we analyze the allocation
1554 sites that we recorded in the previous analysis. The result of the
1555 analysis is a level of indirection (matrix dimension) in which the
1556 flattening is safe. We check the following conditions:
1557 1. There is only one allocation site for each dimension.
1558 2. The allocation sites of all the dimensions are in the same
1559 function.
1560 (The above two are being taken care of during the analysis when
1561 we check the allocation site).
1562 3. All the dimensions that we flatten are allocated at once; thus
1563 the total size must be known before the allocation of the
1564 dimension 0 (top level) -- we must make sure we represent the
1565 size of the allocation as an expression of global parameters or
1566 constants and that those doesn't change over the function. */
1567
1568 static int
1569 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1570 {
1571 int level;
1572 gimple_stmt_iterator gsi;
1573 basic_block bb_level_0;
1574 struct matrix_info *mi = (struct matrix_info *) *slot;
1575 sbitmap visited;
1576
1577 if (!mi->malloc_for_level)
1578 return 1;
1579
1580 visited = sbitmap_alloc (num_ssa_names);
1581
1582 /* Do nothing if the current function is not the allocation
1583 function of MI. */
1584 if (mi->allocation_function_decl != current_function_decl
1585 /* We aren't in the main allocation function yet. */
1586 || !mi->malloc_for_level[0])
1587 return 1;
1588
1589 for (level = 1; level < mi->max_malloced_level; level++)
1590 if (!mi->malloc_for_level[level])
1591 break;
1592
1593 mark_min_matrix_escape_level (mi, level, NULL);
1594
1595 gsi = gsi_for_stmt (mi->malloc_for_level[0]);
1596 bb_level_0 = gsi.bb;
1597
1598 /* Check if the expression of the size passed to malloc could be
1599 pre-calculated before the malloc of level 0. */
1600 for (level = 1; level < mi->min_indirect_level_escape; level++)
1601 {
1602 gimple call_stmt;
1603 tree size;
1604 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1605
1606 call_stmt = mi->malloc_for_level[level];
1607
1608 /* Find the correct malloc information. */
1609 collect_data_for_malloc_call (call_stmt, &mcd);
1610
1611 /* No need to check anticipation for constants. */
1612 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1613 {
1614 if (!mi->dimension_size)
1615 {
1616 mi->dimension_size =
1617 (tree *) xcalloc (mi->min_indirect_level_escape,
1618 sizeof (tree));
1619 mi->dimension_size_orig =
1620 (tree *) xcalloc (mi->min_indirect_level_escape,
1621 sizeof (tree));
1622 }
1623 mi->dimension_size[level] = mcd.size_var;
1624 mi->dimension_size_orig[level] = mcd.size_var;
1625 continue;
1626 }
1627 /* ??? Here we should also add the way to calculate the size
1628 expression not only know that it is anticipated. */
1629 sbitmap_zero (visited);
1630 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1631 if (size == NULL_TREE)
1632 {
1633 mark_min_matrix_escape_level (mi, level, call_stmt);
1634 if (dump_file)
1635 fprintf (dump_file,
1636 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1637 get_name (mi->decl), level);
1638 break;
1639 }
1640 if (!mi->dimension_size)
1641 {
1642 mi->dimension_size =
1643 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1644 mi->dimension_size_orig =
1645 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1646 }
1647 mi->dimension_size[level] = size;
1648 mi->dimension_size_orig[level] = size;
1649 }
1650
1651 /* We don't need those anymore. */
1652 for (level = mi->min_indirect_level_escape;
1653 level < mi->max_malloced_level; level++)
1654 mi->malloc_for_level[level] = NULL;
1655 return 1;
1656 }
1657
1658 /* Track all access and allocation sites. */
1659 static void
1660 find_sites_in_func (bool record)
1661 {
1662 sbitmap visited_stmts_1;
1663
1664 gimple_stmt_iterator gsi;
1665 gimple stmt;
1666 basic_block bb;
1667 struct matrix_info tmpmi, *mi;
1668
1669 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1670
1671 FOR_EACH_BB (bb)
1672 {
1673 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1674 {
1675 tree lhs;
1676
1677 stmt = gsi_stmt (gsi);
1678 lhs = gimple_get_lhs (stmt);
1679 if (lhs != NULL_TREE
1680 && TREE_CODE (lhs) == VAR_DECL)
1681 {
1682 tmpmi.decl = lhs;
1683 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1684 &tmpmi)))
1685 {
1686 sbitmap_zero (visited_stmts_1);
1687 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1688 }
1689 }
1690 if (is_gimple_assign (stmt)
1691 && gimple_assign_single_p (stmt)
1692 && TREE_CODE (lhs) == SSA_NAME
1693 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1694 {
1695 tmpmi.decl = gimple_assign_rhs1 (stmt);
1696 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1697 &tmpmi)))
1698 {
1699 sbitmap_zero (visited_stmts_1);
1700 analyze_matrix_accesses (mi, lhs, 0,
1701 false, visited_stmts_1, record);
1702 }
1703 }
1704 }
1705 }
1706 sbitmap_free (visited_stmts_1);
1707 }
1708
1709 /* Traverse the use-def chains to see if there are matrices that
1710 are passed through pointers and we cannot know how they are accessed.
1711 For each SSA-name defined by a global variable of our interest,
1712 we traverse the use-def chains of the SSA and follow the indirections,
1713 and record in what level of indirection the use of the variable
1714 escapes. A use of a pointer escapes when it is passed to a function,
1715 stored into memory or assigned (except in malloc and free calls). */
1716
1717 static void
1718 record_all_accesses_in_func (void)
1719 {
1720 unsigned i;
1721 sbitmap visited_stmts_1;
1722
1723 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1724
1725 for (i = 0; i < num_ssa_names; i++)
1726 {
1727 struct matrix_info tmpmi, *mi;
1728 tree ssa_var = ssa_name (i);
1729 tree rhs, lhs;
1730
1731 if (!ssa_var
1732 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1733 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1734 continue;
1735 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1736 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1737 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1738 continue;
1739
1740 /* If the RHS is a matrix that we want to analyze, follow the def-use
1741 chain for this SSA_VAR and check for escapes or apply the
1742 flattening. */
1743 tmpmi.decl = rhs;
1744 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1745 {
1746 /* This variable will track the visited PHI nodes, so we can limit
1747 its size to the maximum number of SSA names. */
1748 sbitmap_zero (visited_stmts_1);
1749 analyze_matrix_accesses (mi, ssa_var,
1750 0, false, visited_stmts_1, true);
1751
1752 }
1753 }
1754 sbitmap_free (visited_stmts_1);
1755 }
1756
1757 /* Used when we want to convert the expression: RESULT = something *
1758 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1759 of 2, shift operations can be done, else division and
1760 multiplication. */
1761
1762 static tree
1763 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1764 {
1765
1766 int x, y;
1767 tree result1, ratio, log, orig_tree, new_tree;
1768
1769 x = exact_log2 (orig);
1770 y = exact_log2 (new_val);
1771
1772 if (x != -1 && y != -1)
1773 {
1774 if (x == y)
1775 return result;
1776 else if (x > y)
1777 {
1778 log = build_int_cst (TREE_TYPE (result), x - y);
1779 result1 =
1780 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1781 return result1;
1782 }
1783 log = build_int_cst (TREE_TYPE (result), y - x);
1784 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1785
1786 return result1;
1787 }
1788 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1789 new_tree = build_int_cst (TREE_TYPE (result), new_val);
1790 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1791 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1792
1793 return result1;
1794 }
1795
1796
1797 /* We know that we are allowed to perform matrix flattening (according to the
1798 escape analysis), so we traverse the use-def chains of the SSA vars
1799 defined by the global variables pointing to the matrices of our interest.
1800 in each use of the SSA we calculate the offset from the base address
1801 according to the following equation:
1802
1803 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1804 escaping level is m <= k, and a' is the new allocated matrix,
1805 will be translated to :
1806
1807 b[I(m+1)]...[Ik]
1808
1809 where
1810 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1811 */
1812
1813 static int
1814 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1815 {
1816 gimple_stmt_iterator gsi;
1817 struct matrix_info *mi = (struct matrix_info *) *slot;
1818 int min_escape_l = mi->min_indirect_level_escape;
1819 struct access_site_info *acc_info;
1820 enum tree_code code;
1821 int i;
1822
1823 if (min_escape_l < 2 || !mi->access_l)
1824 return 1;
1825 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1826 i++)
1827 {
1828 /* This is possible because we collect the access sites before
1829 we determine the final minimum indirection level. */
1830 if (acc_info->level >= min_escape_l)
1831 {
1832 free (acc_info);
1833 continue;
1834 }
1835 if (acc_info->is_alloc)
1836 {
1837 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1838 {
1839 ssa_op_iter iter;
1840 tree def;
1841 gimple stmt = acc_info->stmt;
1842 tree lhs;
1843
1844 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1845 mark_sym_for_renaming (SSA_NAME_VAR (def));
1846 gsi = gsi_for_stmt (stmt);
1847 gcc_assert (is_gimple_assign (acc_info->stmt));
1848 lhs = gimple_assign_lhs (acc_info->stmt);
1849 if (TREE_CODE (lhs) == SSA_NAME
1850 && acc_info->level < min_escape_l - 1)
1851 {
1852 imm_use_iterator imm_iter;
1853 use_operand_p use_p;
1854 gimple use_stmt;
1855
1856 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1857 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1858 {
1859 tree rhs, tmp;
1860 gimple new_stmt;
1861
1862 gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1863 == INDIRECT_REF);
1864 /* Emit convert statement to convert to type of use. */
1865 tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1866 add_referenced_var (tmp);
1867 rhs = gimple_assign_rhs1 (acc_info->stmt);
1868 new_stmt = gimple_build_assign (tmp,
1869 TREE_OPERAND (rhs, 0));
1870 tmp = make_ssa_name (tmp, new_stmt);
1871 gimple_assign_set_lhs (new_stmt, tmp);
1872 gsi = gsi_for_stmt (acc_info->stmt);
1873 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1874 SET_USE (use_p, tmp);
1875 }
1876 }
1877 if (acc_info->level < min_escape_l - 1)
1878 gsi_remove (&gsi, true);
1879 }
1880 free (acc_info);
1881 continue;
1882 }
1883 code = gimple_assign_rhs_code (acc_info->stmt);
1884 if (code == INDIRECT_REF
1885 && acc_info->level < min_escape_l - 1)
1886 {
1887 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1888 from "pointer to type" to "type". */
1889 tree t =
1890 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1891 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1892 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1893 gimple_assign_set_rhs1 (acc_info->stmt, t);
1894 }
1895 else if (code == POINTER_PLUS_EXPR
1896 && acc_info->level < (min_escape_l))
1897 {
1898 imm_use_iterator imm_iter;
1899 use_operand_p use_p;
1900
1901 tree offset;
1902 int k = acc_info->level;
1903 tree num_elements, total_elements;
1904 tree tmp1;
1905 tree d_size = mi->dimension_size[k];
1906
1907 /* We already make sure in the analysis that the first operand
1908 is the base and the second is the offset. */
1909 offset = acc_info->offset;
1910 if (mi->dim_map[k] == min_escape_l - 1)
1911 {
1912 if (!check_transpose_p || mi->is_transposed_p == false)
1913 tmp1 = offset;
1914 else
1915 {
1916 tree new_offset;
1917 tree d_type_size, d_type_size_k;
1918
1919 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1920 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1921
1922 new_offset =
1923 compute_offset (mi->dimension_type_size[min_escape_l],
1924 mi->dimension_type_size[k + 1], offset);
1925
1926 total_elements = new_offset;
1927 if (new_offset != offset)
1928 {
1929 gsi = gsi_for_stmt (acc_info->stmt);
1930 tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1931 true, NULL,
1932 true, GSI_SAME_STMT);
1933 }
1934 else
1935 tmp1 = offset;
1936 }
1937 }
1938 else
1939 {
1940 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1941 num_elements =
1942 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1943 fold_convert (sizetype, d_size));
1944 add_referenced_var (d_size);
1945 gsi = gsi_for_stmt (acc_info->stmt);
1946 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1947 NULL, true, GSI_SAME_STMT);
1948 }
1949 /* Replace the offset if needed. */
1950 if (tmp1 != offset)
1951 {
1952 if (TREE_CODE (offset) == SSA_NAME)
1953 {
1954 gimple use_stmt;
1955
1956 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1957 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1958 if (use_stmt == acc_info->stmt)
1959 SET_USE (use_p, tmp1);
1960 }
1961 else
1962 {
1963 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1964 gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1965 update_stmt (acc_info->stmt);
1966 }
1967 }
1968 }
1969 /* ??? meanwhile this happens because we record the same access
1970 site more than once; we should be using a hash table to
1971 avoid this and insert the STMT of the access site only
1972 once.
1973 else
1974 gcc_unreachable (); */
1975 free (acc_info);
1976 }
1977 VEC_free (access_site_info_p, heap, mi->access_l);
1978
1979 update_ssa (TODO_update_ssa);
1980 #ifdef ENABLE_CHECKING
1981 verify_ssa (true);
1982 #endif
1983 return 1;
1984 }
1985
1986 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1987
1988 static void
1989 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1990 {
1991 int i, j, tmp1;
1992 gcov_type tmp;
1993
1994 for (i = 0; i < n - 1; i++)
1995 {
1996 for (j = 0; j < n - 1 - i; j++)
1997 {
1998 if (a[j + 1] < a[j])
1999 {
2000 tmp = a[j]; /* swap a[j] and a[j+1] */
2001 a[j] = a[j + 1];
2002 a[j + 1] = tmp;
2003 tmp1 = dim_map[j];
2004 dim_map[j] = dim_map[j + 1];
2005 dim_map[j + 1] = tmp1;
2006 }
2007 }
2008 }
2009 }
2010
2011 /* Replace multiple mallocs (one for each dimension) to one malloc
2012 with the size of DIM1*DIM2*...*DIMN*size_of_element
2013 Make sure that we hold the size in the malloc site inside a
2014 new global variable; this way we ensure that the size doesn't
2015 change and it is accessible from all the other functions that
2016 uses the matrix. Also, the original calls to free are deleted,
2017 and replaced by a new call to free the flattened matrix. */
2018
2019 static int
2020 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2021 {
2022 int i;
2023 struct matrix_info *mi;
2024 tree type, oldfn, prev_dim_size;
2025 gimple call_stmt_0, use_stmt;
2026 struct cgraph_node *c_node;
2027 struct cgraph_edge *e;
2028 gimple_stmt_iterator gsi;
2029 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2030 HOST_WIDE_INT element_size;
2031
2032 imm_use_iterator imm_iter;
2033 use_operand_p use_p;
2034 tree old_size_0, tmp;
2035 int min_escape_l;
2036 int id;
2037
2038 mi = (struct matrix_info *) *slot;
2039
2040 min_escape_l = mi->min_indirect_level_escape;
2041
2042 if (!mi->malloc_for_level)
2043 mi->min_indirect_level_escape = 0;
2044
2045 if (mi->min_indirect_level_escape < 2)
2046 return 1;
2047
2048 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2049 for (i = 0; i < mi->min_indirect_level_escape; i++)
2050 mi->dim_map[i] = i;
2051 if (check_transpose_p)
2052 {
2053 int i;
2054
2055 if (dump_file)
2056 {
2057 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2058 for (i = 0; i < min_escape_l; i++)
2059 {
2060 fprintf (dump_file, "dim %d before sort ", i);
2061 if (mi->dim_hot_level)
2062 fprintf (dump_file,
2063 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2064 mi->dim_hot_level[i]);
2065 }
2066 }
2067 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2068 mi->min_indirect_level_escape);
2069 if (dump_file)
2070 for (i = 0; i < min_escape_l; i++)
2071 {
2072 fprintf (dump_file, "dim %d after sort\n", i);
2073 if (mi->dim_hot_level)
2074 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2075 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2076 }
2077 for (i = 0; i < mi->min_indirect_level_escape; i++)
2078 {
2079 if (dump_file)
2080 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2081 mi->dim_map[i]);
2082 if (mi->dim_map[i] != i)
2083 {
2084 if (dump_file)
2085 fprintf (dump_file,
2086 "Transposed dimensions: dim %d is now dim %d\n",
2087 mi->dim_map[i], i);
2088 mi->is_transposed_p = true;
2089 }
2090 }
2091 }
2092 else
2093 {
2094 for (i = 0; i < mi->min_indirect_level_escape; i++)
2095 mi->dim_map[i] = i;
2096 }
2097 /* Call statement of allocation site of level 0. */
2098 call_stmt_0 = mi->malloc_for_level[0];
2099
2100 /* Finds the correct malloc information. */
2101 collect_data_for_malloc_call (call_stmt_0, &mcd);
2102
2103 mi->dimension_size[0] = mcd.size_var;
2104 mi->dimension_size_orig[0] = mcd.size_var;
2105 /* Make sure that the variables in the size expression for
2106 all the dimensions (above level 0) aren't modified in
2107 the allocation function. */
2108 for (i = 1; i < mi->min_indirect_level_escape; i++)
2109 {
2110 tree t;
2111 check_var_data data;
2112
2113 /* mi->dimension_size must contain the expression of the size calculated
2114 in check_allocation_function. */
2115 gcc_assert (mi->dimension_size[i]);
2116
2117 data.fn = mi->allocation_function_decl;
2118 data.stmt = NULL;
2119 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2120 check_var_notmodified_p,
2121 &data);
2122 if (t != NULL_TREE)
2123 {
2124 mark_min_matrix_escape_level (mi, i, data.stmt);
2125 break;
2126 }
2127 }
2128
2129 if (mi->min_indirect_level_escape < 2)
2130 return 1;
2131
2132 /* Since we should make sure that the size expression is available
2133 before the call to malloc of level 0. */
2134 gsi = gsi_for_stmt (call_stmt_0);
2135
2136 /* Find out the size of each dimension by looking at the malloc
2137 sites and create a global variable to hold it.
2138 We add the assignment to the global before the malloc of level 0. */
2139
2140 /* To be able to produce gimple temporaries. */
2141 oldfn = current_function_decl;
2142 current_function_decl = mi->allocation_function_decl;
2143 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2144
2145 /* Set the dimension sizes as follows:
2146 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2147 where n is the maximum non escaping level. */
2148 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2149 prev_dim_size = NULL_TREE;
2150
2151 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2152 {
2153 tree dim_size, dim_var;
2154 gimple stmt;
2155 tree d_type_size;
2156
2157 /* Now put the size expression in a global variable and initialize it to
2158 the size expression before the malloc of level 0. */
2159 dim_var =
2160 add_new_static_var (TREE_TYPE
2161 (mi->dimension_size_orig[mi->dim_map[i]]));
2162 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2163
2164 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2165 /* Find which dim ID becomes dim I. */
2166 for (id = 0; id < mi->min_indirect_level_escape; id++)
2167 if (mi->dim_map[id] == i)
2168 break;
2169 d_type_size =
2170 build_int_cst (type, mi->dimension_type_size[id + 1]);
2171 if (!prev_dim_size)
2172 prev_dim_size = build_int_cst (type, element_size);
2173 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2174 {
2175 dim_size = mi->dimension_size_orig[id];
2176 }
2177 else
2178 {
2179 dim_size =
2180 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2181 d_type_size);
2182
2183 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2184 }
2185 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2186 true, GSI_SAME_STMT);
2187 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2188 stmt = gimple_build_assign (dim_var, dim_size);
2189 mark_symbols_for_renaming (stmt);
2190 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2191
2192 prev_dim_size = mi->dimension_size[i] = dim_var;
2193 }
2194 update_ssa (TODO_update_ssa);
2195 /* Replace the malloc size argument in the malloc of level 0 to be
2196 the size of all the dimensions. */
2197 c_node = cgraph_node (mi->allocation_function_decl);
2198 old_size_0 = gimple_call_arg (call_stmt_0, 0);
2199 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2200 NULL, true, GSI_SAME_STMT);
2201 if (TREE_CODE (old_size_0) == SSA_NAME)
2202 {
2203 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2204 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2205 if (use_stmt == call_stmt_0)
2206 SET_USE (use_p, tmp);
2207 }
2208 /* When deleting the calls to malloc we need also to remove the edge from
2209 the call graph to keep it consistent. Notice that cgraph_edge may
2210 create a new node in the call graph if there is no node for the given
2211 declaration; this shouldn't be the case but currently there is no way to
2212 check this outside of "cgraph.c". */
2213 for (i = 1; i < mi->min_indirect_level_escape; i++)
2214 {
2215 gimple_stmt_iterator gsi;
2216 gimple use_stmt1 = NULL;
2217
2218 gimple call_stmt = mi->malloc_for_level[i];
2219 gcc_assert (is_gimple_call (call_stmt));
2220 e = cgraph_edge (c_node, call_stmt);
2221 gcc_assert (e);
2222 cgraph_remove_edge (e);
2223 gsi = gsi_for_stmt (call_stmt);
2224 /* Remove the call stmt. */
2225 gsi_remove (&gsi, true);
2226 /* remove the type cast stmt. */
2227 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2228 gimple_call_lhs (call_stmt))
2229 {
2230 use_stmt1 = use_stmt;
2231 gsi = gsi_for_stmt (use_stmt);
2232 gsi_remove (&gsi, true);
2233 }
2234 /* Remove the assignment of the allocated area. */
2235 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2236 gimple_get_lhs (use_stmt1))
2237 {
2238 gsi = gsi_for_stmt (use_stmt);
2239 gsi_remove (&gsi, true);
2240 }
2241 }
2242 update_ssa (TODO_update_ssa);
2243 #ifdef ENABLE_CHECKING
2244 verify_ssa (true);
2245 #endif
2246 /* Delete the calls to free. */
2247 for (i = 1; i < mi->min_indirect_level_escape; i++)
2248 {
2249 gimple_stmt_iterator gsi;
2250
2251 /* ??? wonder why this case is possible but we failed on it once. */
2252 if (!mi->free_stmts[i].stmt)
2253 continue;
2254
2255 c_node = cgraph_node (mi->free_stmts[i].func);
2256 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2257 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2258 gcc_assert (e);
2259 cgraph_remove_edge (e);
2260 current_function_decl = mi->free_stmts[i].func;
2261 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2262 gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2263 gsi_remove (&gsi, true);
2264 }
2265 /* Return to the previous situation. */
2266 current_function_decl = oldfn;
2267 pop_cfun ();
2268 return 1;
2269
2270 }
2271
2272
2273 /* Print out the results of the escape analysis. */
2274 static int
2275 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2276 {
2277 struct matrix_info *mi = (struct matrix_info *) *slot;
2278
2279 if (!dump_file)
2280 return 1;
2281 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2282 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2283 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2284 fprintf (dump_file, "\n");
2285 if (mi->min_indirect_level_escape >= 2)
2286 fprintf (dump_file, "Flattened %d dimensions \n",
2287 mi->min_indirect_level_escape);
2288 return 1;
2289 }
2290
2291 /* Perform matrix flattening. */
2292
2293 static unsigned int
2294 matrix_reorg (void)
2295 {
2296 struct cgraph_node *node;
2297
2298 if (profile_info)
2299 check_transpose_p = true;
2300 else
2301 check_transpose_p = false;
2302 /* If there are hand written vectors, we skip this optimization. */
2303 for (node = cgraph_nodes; node; node = node->next)
2304 if (!may_flatten_matrices (node))
2305 return 0;
2306 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2307 /* Find and record all potential matrices in the program. */
2308 find_matrices_decl ();
2309 /* Analyze the accesses of the matrices (escaping analysis). */
2310 for (node = cgraph_nodes; node; node = node->next)
2311 if (node->analyzed)
2312 {
2313 tree temp_fn;
2314
2315 temp_fn = current_function_decl;
2316 current_function_decl = node->decl;
2317 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2318 bitmap_obstack_initialize (NULL);
2319 gimple_register_cfg_hooks ();
2320
2321 if (!gimple_in_ssa_p (cfun))
2322 {
2323 free_dominance_info (CDI_DOMINATORS);
2324 free_dominance_info (CDI_POST_DOMINATORS);
2325 pop_cfun ();
2326 current_function_decl = temp_fn;
2327 bitmap_obstack_release (NULL);
2328
2329 return 0;
2330 }
2331
2332 #ifdef ENABLE_CHECKING
2333 verify_flow_info ();
2334 #endif
2335
2336 if (!matrices_to_reorg)
2337 {
2338 free_dominance_info (CDI_DOMINATORS);
2339 free_dominance_info (CDI_POST_DOMINATORS);
2340 pop_cfun ();
2341 current_function_decl = temp_fn;
2342 bitmap_obstack_release (NULL);
2343
2344 return 0;
2345 }
2346
2347 /* Create htap for phi nodes. */
2348 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2349 mat_acc_phi_eq, free);
2350 if (!check_transpose_p)
2351 find_sites_in_func (false);
2352 else
2353 {
2354 find_sites_in_func (true);
2355 loop_optimizer_init (LOOPS_NORMAL);
2356 if (current_loops)
2357 scev_initialize ();
2358 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2359 if (current_loops)
2360 {
2361 scev_finalize ();
2362 loop_optimizer_finalize ();
2363 current_loops = NULL;
2364 }
2365 }
2366 /* If the current function is the allocation function for any of
2367 the matrices we check its allocation and the escaping level. */
2368 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2369 free_dominance_info (CDI_DOMINATORS);
2370 free_dominance_info (CDI_POST_DOMINATORS);
2371 pop_cfun ();
2372 current_function_decl = temp_fn;
2373 bitmap_obstack_release (NULL);
2374 }
2375 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2376 /* Now transform the accesses. */
2377 for (node = cgraph_nodes; node; node = node->next)
2378 if (node->analyzed)
2379 {
2380 /* Remember that allocation sites have been handled. */
2381 tree temp_fn;
2382
2383 temp_fn = current_function_decl;
2384 current_function_decl = node->decl;
2385 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2386 bitmap_obstack_initialize (NULL);
2387 gimple_register_cfg_hooks ();
2388 record_all_accesses_in_func ();
2389 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2390 free_dominance_info (CDI_DOMINATORS);
2391 free_dominance_info (CDI_POST_DOMINATORS);
2392 pop_cfun ();
2393 current_function_decl = temp_fn;
2394 bitmap_obstack_release (NULL);
2395 }
2396 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2397
2398 current_function_decl = NULL;
2399 set_cfun (NULL);
2400 matrices_to_reorg = NULL;
2401 return 0;
2402 }
2403
2404
2405 /* The condition for matrix flattening to be performed. */
2406 static bool
2407 gate_matrix_reorg (void)
2408 {
2409 return flag_ipa_matrix_reorg && flag_whole_program;
2410 }
2411
2412 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2413 {
2414 {
2415 SIMPLE_IPA_PASS,
2416 "matrix-reorg", /* name */
2417 gate_matrix_reorg, /* gate */
2418 matrix_reorg, /* execute */
2419 NULL, /* sub */
2420 NULL, /* next */
2421 0, /* static_pass_number */
2422 0, /* tv_id */
2423 0, /* properties_required */
2424 PROP_trees, /* properties_provided */
2425 0, /* properties_destroyed */
2426 0, /* todo_flags_start */
2427 TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */
2428 }
2429 };
2430