comparison gcc/tree-ssa-loop-prefetch.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Array prefetching. 1 /* Array prefetching.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
18 <http://www.gnu.org/licenses/>. */ 18 <http://www.gnu.org/licenses/>. */
19 19
20 #include "config.h" 20 #include "config.h"
21 #include "system.h" 21 #include "system.h"
22 #include "coretypes.h" 22 #include "coretypes.h"
23 #include "tm.h" 23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
24 #include "tree.h" 26 #include "tree.h"
25 #include "tm_p.h" 27 #include "gimple.h"
26 #include "basic-block.h" 28 #include "predict.h"
27 #include "output.h" 29 #include "tree-pass.h"
30 #include "gimple-ssa.h"
31 #include "optabs-query.h"
28 #include "tree-pretty-print.h" 32 #include "tree-pretty-print.h"
29 #include "tree-flow.h" 33 #include "fold-const.h"
30 #include "tree-dump.h" 34 #include "stor-layout.h"
31 #include "timevar.h" 35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "tree-ssa-loop-ivopts.h"
39 #include "tree-ssa-loop-manip.h"
40 #include "tree-ssa-loop-niter.h"
41 #include "tree-ssa-loop.h"
42 #include "ssa.h"
43 #include "tree-into-ssa.h"
32 #include "cfgloop.h" 44 #include "cfgloop.h"
33 #include "tree-pass.h"
34 #include "insn-config.h"
35 #include "recog.h"
36 #include "hashtab.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h" 45 #include "tree-scalar-evolution.h"
39 #include "diagnostic-core.h"
40 #include "params.h" 46 #include "params.h"
41 #include "langhooks.h" 47 #include "langhooks.h"
42 #include "tree-inline.h" 48 #include "tree-inline.h"
43 #include "tree-data-ref.h" 49 #include "tree-data-ref.h"
44 50 #include "diagnostic-core.h"
45 51 #include "dbgcnt.h"
46 /* FIXME: Needed for optabs, but this should all be moved to a TBD interface
47 between the GIMPLE and RTL worlds. */
48 #include "expr.h"
49 #include "optabs.h"
50 52
51 /* This pass inserts prefetch instructions to optimize cache usage during 53 /* This pass inserts prefetch instructions to optimize cache usage during
52 accesses to arrays in loops. It processes loops sequentially and: 54 accesses to arrays in loops. It processes loops sequentially and:
53 55
54 1) Gathers all memory references in the single loop. 56 1) Gathers all memory references in the single loop.
187 189
188 #ifndef ACCEPTABLE_MISS_RATE 190 #ifndef ACCEPTABLE_MISS_RATE
189 #define ACCEPTABLE_MISS_RATE 50 191 #define ACCEPTABLE_MISS_RATE 50
190 #endif 192 #endif
191 193
192 #ifndef HAVE_prefetch
193 #define HAVE_prefetch 0
194 #endif
195
196 #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024)) 194 #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024))
197 #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024)) 195 #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024))
198 196
199 /* We consider a memory access nontemporal if it is not reused sooner than 197 /* We consider a memory access nontemporal if it is not reused sooner than
200 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore 198 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore
228 { 226 {
229 tree base; /* Base of the reference. */ 227 tree base; /* Base of the reference. */
230 tree step; /* Step of the reference. */ 228 tree step; /* Step of the reference. */
231 struct mem_ref *refs; /* References in the group. */ 229 struct mem_ref *refs; /* References in the group. */
232 struct mem_ref_group *next; /* Next group of references. */ 230 struct mem_ref_group *next; /* Next group of references. */
231 unsigned int uid; /* Group UID, used only for debugging. */
233 }; 232 };
234 233
235 /* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched. */ 234 /* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched. */
236 235
237 #define PREFETCH_ALL (~(unsigned HOST_WIDE_INT) 0) 236 #define PREFETCH_ALL HOST_WIDE_INT_M1U
238 237
239 /* Do not generate a prefetch if the unroll factor is significantly less 238 /* Do not generate a prefetch if the unroll factor is significantly less
240 than what is required by the prefetch. This is to avoid redundant 239 than what is required by the prefetch. This is to avoid redundant
241 prefetches. For example, when prefetch_mod is 16 and unroll_factor is 240 prefetches. For example, when prefetch_mod is 16 and unroll_factor is
242 2, prefetching requires unrolling the loop 16 times, but 241 2, prefetching requires unrolling the loop 16 times, but
257 256
258 /* The memory reference. */ 257 /* The memory reference. */
259 258
260 struct mem_ref 259 struct mem_ref
261 { 260 {
262 gimple stmt; /* Statement in that the reference appears. */ 261 gimple *stmt; /* Statement in that the reference appears. */
263 tree mem; /* The reference. */ 262 tree mem; /* The reference. */
264 HOST_WIDE_INT delta; /* Constant offset of the reference. */ 263 HOST_WIDE_INT delta; /* Constant offset of the reference. */
265 struct mem_ref_group *group; /* The group of references it belongs to. */ 264 struct mem_ref_group *group; /* The group of references it belongs to. */
266 unsigned HOST_WIDE_INT prefetch_mod; 265 unsigned HOST_WIDE_INT prefetch_mod;
267 /* Prefetch only each PREFETCH_MOD-th 266 /* Prefetch only each PREFETCH_MOD-th
270 /* Prefetch only first PREFETCH_BEFORE 269 /* Prefetch only first PREFETCH_BEFORE
271 iterations. */ 270 iterations. */
272 unsigned reuse_distance; /* The amount of data accessed before the first 271 unsigned reuse_distance; /* The amount of data accessed before the first
273 reuse of this value. */ 272 reuse of this value. */
274 struct mem_ref *next; /* The next reference in the group. */ 273 struct mem_ref *next; /* The next reference in the group. */
274 unsigned int uid; /* Ref UID, used only for debugging. */
275 unsigned write_p : 1; /* Is it a write? */ 275 unsigned write_p : 1; /* Is it a write? */
276 unsigned independent_p : 1; /* True if the reference is independent on 276 unsigned independent_p : 1; /* True if the reference is independent on
277 all other references inside the loop. */ 277 all other references inside the loop. */
278 unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */ 278 unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */
279 unsigned storent_p : 1; /* True if we changed the store to a 279 unsigned storent_p : 1; /* True if we changed the store to a
280 nontemporal one. */ 280 nontemporal one. */
281 }; 281 };
282 282
283 /* Dumps information about memory reference */
284 static void
285 dump_mem_details (FILE *file, tree base, tree step,
286 HOST_WIDE_INT delta, bool write_p)
287 {
288 fprintf (file, "(base ");
289 print_generic_expr (file, base, TDF_SLIM);
290 fprintf (file, ", step ");
291 if (cst_and_fits_in_hwi (step))
292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (step));
293 else
294 print_generic_expr (file, step, TDF_SLIM);
295 fprintf (file, ")\n");
296 fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n", delta);
297 fprintf (file, " %s\n\n", write_p ? "write" : "read");
298 }
299
283 /* Dumps information about reference REF to FILE. */ 300 /* Dumps information about reference REF to FILE. */
284 301
285 static void 302 static void
286 dump_mem_ref (FILE *file, struct mem_ref *ref) 303 dump_mem_ref (FILE *file, struct mem_ref *ref)
287 { 304 {
288 fprintf (file, "Reference %p:\n", (void *) ref); 305 fprintf (file, "reference %u:%u (", ref->group->uid, ref->uid);
289 306 print_generic_expr (file, ref->mem, TDF_SLIM);
290 fprintf (file, " group %p (base ", (void *) ref->group);
291 print_generic_expr (file, ref->group->base, TDF_SLIM);
292 fprintf (file, ", step ");
293 if (cst_and_fits_in_hwi (ref->group->step))
294 fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (ref->group->step));
295 else
296 print_generic_expr (file, ref->group->step, TDF_TREE);
297 fprintf (file, ")\n"); 307 fprintf (file, ")\n");
298
299 fprintf (file, " delta ");
300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->delta);
301 fprintf (file, "\n");
302
303 fprintf (file, " %s\n", ref->write_p ? "write" : "read");
304
305 fprintf (file, "\n");
306 } 308 }
307 309
308 /* Finds a group with BASE and STEP in GROUPS, or creates one if it does not 310 /* Finds a group with BASE and STEP in GROUPS, or creates one if it does not
309 exist. */ 311 exist. */
310 312
311 static struct mem_ref_group * 313 static struct mem_ref_group *
312 find_or_create_group (struct mem_ref_group **groups, tree base, tree step) 314 find_or_create_group (struct mem_ref_group **groups, tree base, tree step)
313 { 315 {
316 /* Global count for setting struct mem_ref_group->uid. */
317 static unsigned int last_mem_ref_group_uid = 0;
318
314 struct mem_ref_group *group; 319 struct mem_ref_group *group;
315 320
316 for (; *groups; groups = &(*groups)->next) 321 for (; *groups; groups = &(*groups)->next)
317 { 322 {
318 if (operand_equal_p ((*groups)->step, step, 0) 323 if (operand_equal_p ((*groups)->step, step, 0)
319 && operand_equal_p ((*groups)->base, base, 0)) 324 && operand_equal_p ((*groups)->base, base, 0))
320 return *groups; 325 return *groups;
321 326
322 /* If step is an integer constant, keep the list of groups sorted 327 /* If step is an integer constant, keep the list of groups sorted
323 by decreasing step. */ 328 by decreasing step. */
324 if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step) 329 if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step)
325 && int_cst_value ((*groups)->step) < int_cst_value (step)) 330 && int_cst_value ((*groups)->step) < int_cst_value (step))
326 break; 331 break;
327 } 332 }
328 333
329 group = XNEW (struct mem_ref_group); 334 group = XNEW (struct mem_ref_group);
330 group->base = base; 335 group->base = base;
331 group->step = step; 336 group->step = step;
332 group->refs = NULL; 337 group->refs = NULL;
338 group->uid = ++last_mem_ref_group_uid;
333 group->next = *groups; 339 group->next = *groups;
334 *groups = group; 340 *groups = group;
335 341
336 return group; 342 return group;
337 } 343 }
338 344
339 /* Records a memory reference MEM in GROUP with offset DELTA and write status 345 /* Records a memory reference MEM in GROUP with offset DELTA and write status
340 WRITE_P. The reference occurs in statement STMT. */ 346 WRITE_P. The reference occurs in statement STMT. */
341 347
342 static void 348 static void
343 record_ref (struct mem_ref_group *group, gimple stmt, tree mem, 349 record_ref (struct mem_ref_group *group, gimple *stmt, tree mem,
344 HOST_WIDE_INT delta, bool write_p) 350 HOST_WIDE_INT delta, bool write_p)
345 { 351 {
352 unsigned int last_mem_ref_uid = 0;
346 struct mem_ref **aref; 353 struct mem_ref **aref;
347 354
348 /* Do not record the same address twice. */ 355 /* Do not record the same address twice. */
349 for (aref = &group->refs; *aref; aref = &(*aref)->next) 356 for (aref = &group->refs; *aref; aref = &(*aref)->next)
350 { 357 {
358 last_mem_ref_uid = (*aref)->uid;
359
351 /* It does not have to be possible for write reference to reuse the read 360 /* It does not have to be possible for write reference to reuse the read
352 prefetch, or vice versa. */ 361 prefetch, or vice versa. */
353 if (!WRITE_CAN_USE_READ_PREFETCH 362 if (!WRITE_CAN_USE_READ_PREFETCH
354 && write_p 363 && write_p
355 && !(*aref)->write_p) 364 && !(*aref)->write_p)
374 (*aref)->issue_prefetch_p = false; 383 (*aref)->issue_prefetch_p = false;
375 (*aref)->group = group; 384 (*aref)->group = group;
376 (*aref)->next = NULL; 385 (*aref)->next = NULL;
377 (*aref)->independent_p = false; 386 (*aref)->independent_p = false;
378 (*aref)->storent_p = false; 387 (*aref)->storent_p = false;
388 (*aref)->uid = last_mem_ref_uid + 1;
379 389
380 if (dump_file && (dump_flags & TDF_DETAILS)) 390 if (dump_file && (dump_flags & TDF_DETAILS))
381 dump_mem_ref (dump_file, *aref); 391 {
392 dump_mem_ref (dump_file, *aref);
393
394 fprintf (dump_file, " group %u ", group->uid);
395 dump_mem_details (dump_file, group->base, group->step, delta,
396 write_p);
397 }
382 } 398 }
383 399
384 /* Release memory references in GROUPS. */ 400 /* Release memory references in GROUPS. */
385 401
386 static void 402 static void
404 /* A structure used to pass arguments to idx_analyze_ref. */ 420 /* A structure used to pass arguments to idx_analyze_ref. */
405 421
406 struct ar_data 422 struct ar_data
407 { 423 {
408 struct loop *loop; /* Loop of the reference. */ 424 struct loop *loop; /* Loop of the reference. */
409 gimple stmt; /* Statement of the reference. */ 425 gimple *stmt; /* Statement of the reference. */
410 tree *step; /* Step of the memory reference. */ 426 tree *step; /* Step of the memory reference. */
411 HOST_WIDE_INT *delta; /* Offset of the memory reference. */ 427 HOST_WIDE_INT *delta; /* Offset of the memory reference. */
412 }; 428 };
413 429
414 /* Analyzes a single INDEX of a memory reference to obtain information 430 /* Analyzes a single INDEX of a memory reference to obtain information
470 references from REF_P. */ 486 references from REF_P. */
471 487
472 static bool 488 static bool
473 analyze_ref (struct loop *loop, tree *ref_p, tree *base, 489 analyze_ref (struct loop *loop, tree *ref_p, tree *base,
474 tree *step, HOST_WIDE_INT *delta, 490 tree *step, HOST_WIDE_INT *delta,
475 gimple stmt) 491 gimple *stmt)
476 { 492 {
477 struct ar_data ar_data; 493 struct ar_data ar_data;
478 tree off; 494 tree off;
479 HOST_WIDE_INT bit_offset; 495 HOST_WIDE_INT bit_offset;
480 tree ref = *ref_p; 496 tree ref = *ref_p;
518 LOOP in statement STMT and it is write if WRITE_P. Returns true if the 534 LOOP in statement STMT and it is write if WRITE_P. Returns true if the
519 reference was recorded, false otherwise. */ 535 reference was recorded, false otherwise. */
520 536
521 static bool 537 static bool
522 gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, 538 gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
523 tree ref, bool write_p, gimple stmt) 539 tree ref, bool write_p, gimple *stmt)
524 { 540 {
525 tree base, step; 541 tree base, step;
526 HOST_WIDE_INT delta; 542 HOST_WIDE_INT delta;
527 struct mem_ref_group *agrp; 543 struct mem_ref_group *agrp;
528 544
537 553
538 /* Stop if the address of BASE could not be taken. */ 554 /* Stop if the address of BASE could not be taken. */
539 if (may_be_nonaddressable_p (base)) 555 if (may_be_nonaddressable_p (base))
540 return false; 556 return false;
541 557
542 /* Limit non-constant step prefetching only to the innermost loops. */ 558 /* Limit non-constant step prefetching only to the innermost loops and
543 if (!cst_and_fits_in_hwi (step) && loop->inner != NULL) 559 only when the step is loop invariant in the entire loop nest. */
544 return false; 560 if (!cst_and_fits_in_hwi (step))
561 {
562 if (loop->inner != NULL)
563 {
564 if (dump_file && (dump_flags & TDF_DETAILS))
565 {
566 fprintf (dump_file, "Memory expression %p\n",(void *) ref );
567 print_generic_expr (dump_file, ref, TDF_SLIM);
568 fprintf (dump_file,":");
569 dump_mem_details (dump_file, base, step, delta, write_p);
570 fprintf (dump_file,
571 "Ignoring %p, non-constant step prefetching is "
572 "limited to inner most loops \n",
573 (void *) ref);
574 }
575 return false;
576 }
577 else
578 {
579 if (!expr_invariant_in_loop_p (loop_outermost (loop), step))
580 {
581 if (dump_file && (dump_flags & TDF_DETAILS))
582 {
583 fprintf (dump_file, "Memory expression %p\n",(void *) ref );
584 print_generic_expr (dump_file, ref, TDF_SLIM);
585 fprintf (dump_file,":");
586 dump_mem_details (dump_file, base, step, delta, write_p);
587 fprintf (dump_file,
588 "Not prefetching, ignoring %p due to "
589 "loop variant step\n",
590 (void *) ref);
591 }
592 return false;
593 }
594 }
595 }
545 596
546 /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP 597 /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
547 are integer constants. */ 598 are integer constants. */
548 agrp = find_or_create_group (refs, base, step); 599 agrp = find_or_create_group (refs, base, step);
549 record_ref (agrp, stmt, ref, delta, write_p); 600 record_ref (agrp, stmt, ref, delta, write_p);
559 { 610 {
560 basic_block *body = get_loop_body_in_dom_order (loop); 611 basic_block *body = get_loop_body_in_dom_order (loop);
561 basic_block bb; 612 basic_block bb;
562 unsigned i; 613 unsigned i;
563 gimple_stmt_iterator bsi; 614 gimple_stmt_iterator bsi;
564 gimple stmt; 615 gimple *stmt;
565 tree lhs, rhs; 616 tree lhs, rhs;
566 struct mem_ref_group *refs = NULL; 617 struct mem_ref_group *refs = NULL;
567 618
568 *no_other_refs = true; 619 *no_other_refs = true;
569 *ref_count = 0; 620 *ref_count = 0;
586 || (is_gimple_call (stmt) 637 || (is_gimple_call (stmt)
587 && !(gimple_call_flags (stmt) & ECF_CONST))) 638 && !(gimple_call_flags (stmt) & ECF_CONST)))
588 *no_other_refs = false; 639 *no_other_refs = false;
589 continue; 640 continue;
590 } 641 }
642
643 if (! gimple_vuse (stmt))
644 continue;
591 645
592 lhs = gimple_assign_lhs (stmt); 646 lhs = gimple_assign_lhs (stmt);
593 rhs = gimple_assign_rhs1 (stmt); 647 rhs = gimple_assign_rhs1 (stmt);
594 648
595 if (REFERENCE_CLASS_P (rhs)) 649 if (REFERENCE_CLASS_P (rhs))
656 ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) 710 ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by)
657 { 711 {
658 gcc_assert (by > 0); 712 gcc_assert (by > 0);
659 713
660 if (x >= 0) 714 if (x >= 0)
661 return x / by; 715 return x / (HOST_WIDE_INT) by;
662 else 716 else
663 return (x + by - 1) / by; 717 return (x + (HOST_WIDE_INT) by - 1) / (HOST_WIDE_INT) by;
664 } 718 }
665 719
666 /* Given a CACHE_LINE_SIZE and two inductive memory references 720 /* Given a CACHE_LINE_SIZE and two inductive memory references
667 with a common STEP greater than CACHE_LINE_SIZE and an address 721 with a common STEP greater than CACHE_LINE_SIZE and an address
668 difference DELTA, compute the probability that they will fall 722 difference DELTA, compute the probability that they will fall
792 /* The accesses are sure to meet. Let us check when. */ 846 /* The accesses are sure to meet. Let us check when. */
793 hit_from = ddown (delta_b, PREFETCH_BLOCK) * PREFETCH_BLOCK; 847 hit_from = ddown (delta_b, PREFETCH_BLOCK) * PREFETCH_BLOCK;
794 prefetch_before = (hit_from - delta_r + step - 1) / step; 848 prefetch_before = (hit_from - delta_r + step - 1) / step;
795 849
796 /* Do not reduce prefetch_before if we meet beyond cache size. */ 850 /* Do not reduce prefetch_before if we meet beyond cache size. */
797 if (prefetch_before > (unsigned) abs (L2_CACHE_SIZE_BYTES / step)) 851 if (prefetch_before > absu_hwi (L2_CACHE_SIZE_BYTES / step))
798 prefetch_before = PREFETCH_ALL; 852 prefetch_before = PREFETCH_ALL;
799 if (prefetch_before < ref->prefetch_before) 853 if (prefetch_before < ref->prefetch_before)
800 ref->prefetch_before = prefetch_before; 854 ref->prefetch_before = prefetch_before;
801 855
802 return; 856 return;
893 { 947 {
894 prune_ref_by_reuse (ref_pruned, group->refs); 948 prune_ref_by_reuse (ref_pruned, group->refs);
895 949
896 if (dump_file && (dump_flags & TDF_DETAILS)) 950 if (dump_file && (dump_flags & TDF_DETAILS))
897 { 951 {
898 fprintf (dump_file, "Reference %p:", (void *) ref_pruned); 952 dump_mem_ref (dump_file, ref_pruned);
899 953
900 if (ref_pruned->prefetch_before == PREFETCH_ALL 954 if (ref_pruned->prefetch_before == PREFETCH_ALL
901 && ref_pruned->prefetch_mod == 1) 955 && ref_pruned->prefetch_mod == 1)
902 fprintf (dump_file, " no restrictions"); 956 fprintf (dump_file, " no restrictions");
903 else if (ref_pruned->prefetch_before == 0) 957 else if (ref_pruned->prefetch_before == 0)
941 /* For now do not issue prefetches for only first few of the 995 /* For now do not issue prefetches for only first few of the
942 iterations. */ 996 iterations. */
943 if (ref->prefetch_before != PREFETCH_ALL) 997 if (ref->prefetch_before != PREFETCH_ALL)
944 { 998 {
945 if (dump_file && (dump_flags & TDF_DETAILS)) 999 if (dump_file && (dump_flags & TDF_DETAILS))
946 fprintf (dump_file, "Ignoring %p due to prefetch_before\n", 1000 fprintf (dump_file, "Ignoring reference %u:%u due to prefetch_before\n",
947 (void *) ref); 1001 ref->group->uid, ref->uid);
948 return false; 1002 return false;
949 } 1003 }
950 1004
951 /* Do not prefetch nontemporal stores. */ 1005 /* Do not prefetch nontemporal stores. */
952 if (ref->storent_p) 1006 if (ref->storent_p)
953 { 1007 {
954 if (dump_file && (dump_flags & TDF_DETAILS)) 1008 if (dump_file && (dump_flags & TDF_DETAILS))
955 fprintf (dump_file, "Ignoring nontemporal store %p\n", (void *) ref); 1009 fprintf (dump_file, "Ignoring nontemporal store reference %u:%u\n", ref->group->uid, ref->uid);
956 return false; 1010 return false;
957 } 1011 }
958 1012
959 return true; 1013 return true;
960 } 1014 }
1013 /* If more than half of the prefetches would be lost anyway, do not 1067 /* If more than half of the prefetches would be lost anyway, do not
1014 issue the prefetch. */ 1068 issue the prefetch. */
1015 if (2 * remaining_prefetch_slots < prefetch_slots) 1069 if (2 * remaining_prefetch_slots < prefetch_slots)
1016 continue; 1070 continue;
1017 1071
1072 /* Stop prefetching if debug counter is activated. */
1073 if (!dbg_cnt (prefetch))
1074 continue;
1075
1018 ref->issue_prefetch_p = true; 1076 ref->issue_prefetch_p = true;
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 fprintf (dump_file, "Decided to issue prefetch for reference %u:%u\n",
1079 ref->group->uid, ref->uid);
1019 1080
1020 if (remaining_prefetch_slots <= prefetch_slots) 1081 if (remaining_prefetch_slots <= prefetch_slots)
1021 return true; 1082 return true;
1022 remaining_prefetch_slots -= prefetch_slots; 1083 remaining_prefetch_slots -= prefetch_slots;
1023 any = true; 1084 any = true;
1071 static void 1132 static void
1072 issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) 1133 issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
1073 { 1134 {
1074 HOST_WIDE_INT delta; 1135 HOST_WIDE_INT delta;
1075 tree addr, addr_base, write_p, local, forward; 1136 tree addr, addr_base, write_p, local, forward;
1076 gimple prefetch; 1137 gcall *prefetch;
1077 gimple_stmt_iterator bsi; 1138 gimple_stmt_iterator bsi;
1078 unsigned n_prefetches, ap; 1139 unsigned n_prefetches, ap;
1079 bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES; 1140 bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES;
1080 1141
1081 if (dump_file && (dump_flags & TDF_DETAILS)) 1142 if (dump_file && (dump_flags & TDF_DETAILS))
1082 fprintf (dump_file, "Issued%s prefetch for %p.\n", 1143 fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
1083 nontemporal ? " nontemporal" : "", 1144 nontemporal ? " nontemporal" : "",
1084 (void *) ref); 1145 ref->group->uid, ref->uid);
1085 1146
1086 bsi = gsi_for_stmt (ref->stmt); 1147 bsi = gsi_for_stmt (ref->stmt);
1087 1148
1088 n_prefetches = ((unroll_factor + ref->prefetch_mod - 1) 1149 n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
1089 / ref->prefetch_mod); 1150 / ref->prefetch_mod);
1098 if (cst_and_fits_in_hwi (ref->group->step)) 1159 if (cst_and_fits_in_hwi (ref->group->step))
1099 { 1160 {
1100 /* Determine the address to prefetch. */ 1161 /* Determine the address to prefetch. */
1101 delta = (ahead + ap * ref->prefetch_mod) * 1162 delta = (ahead + ap * ref->prefetch_mod) *
1102 int_cst_value (ref->group->step); 1163 int_cst_value (ref->group->step);
1103 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, 1164 addr = fold_build_pointer_plus_hwi (addr_base, delta);
1104 addr_base, size_int (delta)); 1165 addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
1105 addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, 1166 NULL, true, GSI_SAME_STMT);
1106 true, GSI_SAME_STMT);
1107 } 1167 }
1108 else 1168 else
1109 { 1169 {
1110 /* The step size is non-constant but loop-invariant. We use the 1170 /* The step size is non-constant but loop-invariant. We use the
1111 heuristic to simply prefetch ahead iterations ahead. */ 1171 heuristic to simply prefetch ahead iterations ahead. */
1112 forward = fold_build2 (MULT_EXPR, sizetype, 1172 forward = fold_build2 (MULT_EXPR, sizetype,
1113 fold_convert (sizetype, ref->group->step), 1173 fold_convert (sizetype, ref->group->step),
1114 fold_convert (sizetype, size_int (ahead))); 1174 fold_convert (sizetype, size_int (ahead)));
1115 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, addr_base, 1175 addr = fold_build_pointer_plus (addr_base, forward);
1116 forward);
1117 addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, 1176 addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
1118 NULL, true, GSI_SAME_STMT); 1177 NULL, true, GSI_SAME_STMT);
1119 } 1178 }
1179
1180 if (addr_base != addr
1181 && TREE_CODE (addr_base) == SSA_NAME
1182 && TREE_CODE (addr) == SSA_NAME)
1183 {
1184 duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
1185 /* As this isn't a plain copy we have to reset alignment
1186 information. */
1187 if (SSA_NAME_PTR_INFO (addr))
1188 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
1189 }
1190
1120 /* Create the prefetch instruction. */ 1191 /* Create the prefetch instruction. */
1121 prefetch = gimple_build_call (built_in_decls[BUILT_IN_PREFETCH], 1192 prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
1122 3, addr, write_p, local); 1193 3, addr, write_p, local);
1123 gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT); 1194 gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
1124 } 1195 }
1125 } 1196 }
1126 1197
1144 can be used. */ 1215 can be used. */
1145 1216
1146 static bool 1217 static bool
1147 nontemporal_store_p (struct mem_ref *ref) 1218 nontemporal_store_p (struct mem_ref *ref)
1148 { 1219 {
1149 enum machine_mode mode; 1220 machine_mode mode;
1150 enum insn_code code; 1221 enum insn_code code;
1151 1222
1152 /* REF must be a write that is not reused. We require it to be independent 1223 /* REF must be a write that is not reused. We require it to be independent
1153 on all other memory references in the loop, as the nontemporal stores may 1224 on all other memory references in the loop, as the nontemporal stores may
1154 be reordered with respect to other memory references. */ 1225 be reordered with respect to other memory references. */
1174 { 1245 {
1175 if (!nontemporal_store_p (ref)) 1246 if (!nontemporal_store_p (ref))
1176 return false; 1247 return false;
1177 1248
1178 if (dump_file && (dump_flags & TDF_DETAILS)) 1249 if (dump_file && (dump_flags & TDF_DETAILS))
1179 fprintf (dump_file, "Marked reference %p as a nontemporal store.\n", 1250 fprintf (dump_file, "Marked reference %u:%u as a nontemporal store.\n",
1180 (void *) ref); 1251 ref->group->uid, ref->uid);
1181 1252
1182 gimple_assign_set_nontemporal_move (ref->stmt, true); 1253 gimple_assign_set_nontemporal_move (ref->stmt, true);
1183 ref->storent_p = true; 1254 ref->storent_p = true;
1184 1255
1185 return true; 1256 return true;
1188 /* Issue a memory fence instruction after LOOP. */ 1259 /* Issue a memory fence instruction after LOOP. */
1189 1260
1190 static void 1261 static void
1191 emit_mfence_after_loop (struct loop *loop) 1262 emit_mfence_after_loop (struct loop *loop)
1192 { 1263 {
1193 VEC (edge, heap) *exits = get_loop_exit_edges (loop); 1264 vec<edge> exits = get_loop_exit_edges (loop);
1194 edge exit; 1265 edge exit;
1195 gimple call; 1266 gcall *call;
1196 gimple_stmt_iterator bsi; 1267 gimple_stmt_iterator bsi;
1197 unsigned i; 1268 unsigned i;
1198 1269
1199 FOR_EACH_VEC_ELT (edge, exits, i, exit) 1270 FOR_EACH_VEC_ELT (exits, i, exit)
1200 { 1271 {
1201 call = gimple_build_call (FENCE_FOLLOWING_MOVNT, 0); 1272 call = gimple_build_call (FENCE_FOLLOWING_MOVNT, 0);
1202 1273
1203 if (!single_pred_p (exit->dest) 1274 if (!single_pred_p (exit->dest)
1204 /* If possible, we prefer not to insert the fence on other paths 1275 /* If possible, we prefer not to insert the fence on other paths
1206 && !(exit->flags & EDGE_ABNORMAL)) 1277 && !(exit->flags & EDGE_ABNORMAL))
1207 split_loop_exit_edge (exit); 1278 split_loop_exit_edge (exit);
1208 bsi = gsi_after_labels (exit->dest); 1279 bsi = gsi_after_labels (exit->dest);
1209 1280
1210 gsi_insert_before (&bsi, call, GSI_NEW_STMT); 1281 gsi_insert_before (&bsi, call, GSI_NEW_STMT);
1211 mark_virtual_ops_for_renaming (call); 1282 }
1212 } 1283
1213 1284 exits.release ();
1214 VEC_free (edge, heap, exits);
1215 update_ssa (TODO_update_ssa_only_virtuals); 1285 update_ssa (TODO_update_ssa_only_virtuals);
1216 } 1286 }
1217 1287
1218 /* Returns true if we can use storent in loop, false otherwise. */ 1288 /* Returns true if we can use storent in loop, false otherwise. */
1219 1289
1227 1297
1228 /* If we must issue a mfence insn after using storent, check that there 1298 /* If we must issue a mfence insn after using storent, check that there
1229 is a suitable place for it at each of the loop exits. */ 1299 is a suitable place for it at each of the loop exits. */
1230 if (FENCE_FOLLOWING_MOVNT != NULL_TREE) 1300 if (FENCE_FOLLOWING_MOVNT != NULL_TREE)
1231 { 1301 {
1232 VEC (edge, heap) *exits = get_loop_exit_edges (loop); 1302 vec<edge> exits = get_loop_exit_edges (loop);
1233 unsigned i; 1303 unsigned i;
1234 edge exit; 1304 edge exit;
1235 1305
1236 FOR_EACH_VEC_ELT (edge, exits, i, exit) 1306 FOR_EACH_VEC_ELT (exits, i, exit)
1237 if ((exit->flags & EDGE_ABNORMAL) 1307 if ((exit->flags & EDGE_ABNORMAL)
1238 && exit->dest == EXIT_BLOCK_PTR) 1308 && exit->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1239 ret = false; 1309 ret = false;
1240 1310
1241 VEC_free (edge, heap, exits); 1311 exits.release ();
1242 } 1312 }
1243 1313
1244 return ret; 1314 return ret;
1245 } 1315 }
1246 1316
1286 return true; 1356 return true;
1287 } 1357 }
1288 1358
1289 /* Determine the coefficient by that unroll LOOP, from the information 1359 /* Determine the coefficient by that unroll LOOP, from the information
1290 contained in the list of memory references REFS. Description of 1360 contained in the list of memory references REFS. Description of
1291 umber of iterations of LOOP is stored to DESC. NINSNS is the number of 1361 number of iterations of LOOP is stored to DESC. NINSNS is the number of
1292 insns of the LOOP. EST_NITER is the estimated number of iterations of 1362 insns of the LOOP. EST_NITER is the estimated number of iterations of
1293 the loop, or -1 if no estimate is available. */ 1363 the loop, or -1 if no estimate is available. */
1294 1364
1295 static unsigned 1365 static unsigned
1296 determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, 1366 determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
1410 access_fn = CHREC_LEFT (access_fn); 1480 access_fn = CHREC_LEFT (access_fn);
1411 1481
1412 if ((unsigned) loop_depth (aloop) <= min_depth) 1482 if ((unsigned) loop_depth (aloop) <= min_depth)
1413 continue; 1483 continue;
1414 1484
1415 if (host_integerp (step, 0)) 1485 if (tree_fits_shwi_p (step))
1416 astep = tree_low_cst (step, 0); 1486 astep = tree_to_shwi (step);
1417 else 1487 else
1418 astep = L1_CACHE_LINE_SIZE; 1488 astep = L1_CACHE_LINE_SIZE;
1419 1489
1420 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride; 1490 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride;
1421 1491
1431 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, 1501 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
1432 struct loop *loop) 1502 struct loop *loop)
1433 { 1503 {
1434 tree stride, access_fn; 1504 tree stride, access_fn;
1435 HOST_WIDE_INT *strides, astride; 1505 HOST_WIDE_INT *strides, astride;
1436 VEC (tree, heap) *access_fns; 1506 vec<tree> access_fns;
1437 tree ref = DR_REF (dr); 1507 tree ref = DR_REF (dr);
1438 unsigned i, ret = ~0u; 1508 unsigned i, ret = ~0u;
1439 1509
1440 /* In the following example: 1510 /* In the following example:
1441 1511
1450 the innermost loop in that the stride is less than cache size. */ 1520 the innermost loop in that the stride is less than cache size. */
1451 1521
1452 strides = XCNEWVEC (HOST_WIDE_INT, n); 1522 strides = XCNEWVEC (HOST_WIDE_INT, n);
1453 access_fns = DR_ACCESS_FNS (dr); 1523 access_fns = DR_ACCESS_FNS (dr);
1454 1524
1455 FOR_EACH_VEC_ELT (tree, access_fns, i, access_fn) 1525 FOR_EACH_VEC_ELT (access_fns, i, access_fn)
1456 { 1526 {
1457 /* Keep track of the reference corresponding to the subscript, so that we 1527 /* Keep track of the reference corresponding to the subscript, so that we
1458 know its stride. */ 1528 know its stride. */
1459 while (handled_component_p (ref) && TREE_CODE (ref) != ARRAY_REF) 1529 while (handled_component_p (ref) && TREE_CODE (ref) != ARRAY_REF)
1460 ref = TREE_OPERAND (ref, 0); 1530 ref = TREE_OPERAND (ref, 0);
1461 1531
1462 if (TREE_CODE (ref) == ARRAY_REF) 1532 if (TREE_CODE (ref) == ARRAY_REF)
1463 { 1533 {
1464 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); 1534 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1465 if (host_integerp (stride, 1)) 1535 if (tree_fits_uhwi_p (stride))
1466 astride = tree_low_cst (stride, 1); 1536 astride = tree_to_uhwi (stride);
1467 else 1537 else
1468 astride = L1_CACHE_LINE_SIZE; 1538 astride = L1_CACHE_LINE_SIZE;
1469 1539
1470 ref = TREE_OPERAND (ref, 0); 1540 ref = TREE_OPERAND (ref, 0);
1471 } 1541 }
1494 return ret; 1564 return ret;
1495 } 1565 }
1496 1566
1497 /* Determines the distance till the first reuse of each reference in REFS 1567 /* Determines the distance till the first reuse of each reference in REFS
1498 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other 1568 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
1499 memory references in the loop. */ 1569 memory references in the loop. Return false if the analysis fails. */
1500 1570
1501 static void 1571 static bool
1502 determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, 1572 determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
1503 bool no_other_refs) 1573 bool no_other_refs)
1504 { 1574 {
1505 struct loop *nest, *aloop; 1575 struct loop *nest, *aloop;
1506 VEC (data_reference_p, heap) *datarefs = NULL; 1576 vec<data_reference_p> datarefs = vNULL;
1507 VEC (ddr_p, heap) *dependences = NULL; 1577 vec<ddr_p> dependences = vNULL;
1508 struct mem_ref_group *gr; 1578 struct mem_ref_group *gr;
1509 struct mem_ref *ref, *refb; 1579 struct mem_ref *ref, *refb;
1510 VEC (loop_p, heap) *vloops = NULL; 1580 auto_vec<loop_p> vloops;
1511 unsigned *loop_data_size; 1581 unsigned *loop_data_size;
1512 unsigned i, j, n; 1582 unsigned i, j, n;
1513 unsigned volume, dist, adist; 1583 unsigned volume, dist, adist;
1514 HOST_WIDE_INT vol; 1584 HOST_WIDE_INT vol;
1515 data_reference_p dr; 1585 data_reference_p dr;
1516 ddr_p dep; 1586 ddr_p dep;
1517 1587
1518 if (loop->inner) 1588 if (loop->inner)
1519 return; 1589 return true;
1520 1590
1521 /* Find the outermost loop of the loop nest of loop (we require that 1591 /* Find the outermost loop of the loop nest of loop (we require that
1522 there are no sibling loops inside the nest). */ 1592 there are no sibling loops inside the nest). */
1523 nest = loop; 1593 nest = loop;
1524 while (1) 1594 while (1)
1534 1604
1535 /* For each loop, determine the amount of data accessed in each iteration. 1605 /* For each loop, determine the amount of data accessed in each iteration.
1536 We use this to estimate whether the reference is evicted from the 1606 We use this to estimate whether the reference is evicted from the
1537 cache before its reuse. */ 1607 cache before its reuse. */
1538 find_loop_nest (nest, &vloops); 1608 find_loop_nest (nest, &vloops);
1539 n = VEC_length (loop_p, vloops); 1609 n = vloops.length ();
1540 loop_data_size = XNEWVEC (unsigned, n); 1610 loop_data_size = XNEWVEC (unsigned, n);
1541 volume = volume_of_references (refs); 1611 volume = volume_of_references (refs);
1542 i = n; 1612 i = n;
1543 while (i-- != 0) 1613 while (i-- != 0)
1544 { 1614 {
1546 /* Bound the volume by the L2 cache size, since above this bound, 1616 /* Bound the volume by the L2 cache size, since above this bound,
1547 all dependence distances are equivalent. */ 1617 all dependence distances are equivalent. */
1548 if (volume > L2_CACHE_SIZE_BYTES) 1618 if (volume > L2_CACHE_SIZE_BYTES)
1549 continue; 1619 continue;
1550 1620
1551 aloop = VEC_index (loop_p, vloops, i); 1621 aloop = vloops[i];
1552 vol = estimated_loop_iterations_int (aloop, false); 1622 vol = estimated_stmt_executions_int (aloop);
1553 if (vol < 0) 1623 if (vol == -1)
1554 vol = expected_loop_iterations (aloop); 1624 vol = expected_loop_iterations (aloop);
1555 volume *= vol; 1625 volume *= vol;
1556 } 1626 }
1557 1627
1558 /* Prepare the references in the form suitable for data dependence 1628 /* Prepare the references in the form suitable for data dependence
1560 are used just as a heuristics to estimate temporality of the 1630 are used just as a heuristics to estimate temporality of the
1561 references, hence we do not need to worry about correctness). */ 1631 references, hence we do not need to worry about correctness). */
1562 for (gr = refs; gr; gr = gr->next) 1632 for (gr = refs; gr; gr = gr->next)
1563 for (ref = gr->refs; ref; ref = ref->next) 1633 for (ref = gr->refs; ref; ref = ref->next)
1564 { 1634 {
1565 dr = create_data_ref (nest, loop_containing_stmt (ref->stmt), 1635 dr = create_data_ref (loop_preheader_edge (nest),
1566 ref->mem, ref->stmt, !ref->write_p); 1636 loop_containing_stmt (ref->stmt),
1637 ref->mem, ref->stmt, !ref->write_p, false);
1567 1638
1568 if (dr) 1639 if (dr)
1569 { 1640 {
1570 ref->reuse_distance = volume; 1641 ref->reuse_distance = volume;
1571 dr->aux = ref; 1642 dr->aux = ref;
1572 VEC_safe_push (data_reference_p, heap, datarefs, dr); 1643 datarefs.safe_push (dr);
1573 } 1644 }
1574 else 1645 else
1575 no_other_refs = false; 1646 no_other_refs = false;
1576 } 1647 }
1577 1648
1578 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 1649 FOR_EACH_VEC_ELT (datarefs, i, dr)
1579 { 1650 {
1580 dist = self_reuse_distance (dr, loop_data_size, n, loop); 1651 dist = self_reuse_distance (dr, loop_data_size, n, loop);
1581 ref = (struct mem_ref *) dr->aux; 1652 ref = (struct mem_ref *) dr->aux;
1582 if (ref->reuse_distance > dist) 1653 if (ref->reuse_distance > dist)
1583 ref->reuse_distance = dist; 1654 ref->reuse_distance = dist;
1584 1655
1585 if (no_other_refs) 1656 if (no_other_refs)
1586 ref->independent_p = true; 1657 ref->independent_p = true;
1587 } 1658 }
1588 1659
1589 compute_all_dependences (datarefs, &dependences, vloops, true); 1660 if (!compute_all_dependences (datarefs, &dependences, vloops, true))
1590 1661 return false;
1591 FOR_EACH_VEC_ELT (ddr_p, dependences, i, dep) 1662
1663 FOR_EACH_VEC_ELT (dependences, i, dep)
1592 { 1664 {
1593 if (DDR_ARE_DEPENDENT (dep) == chrec_known) 1665 if (DDR_ARE_DEPENDENT (dep) == chrec_known)
1594 continue; 1666 continue;
1595 1667
1596 ref = (struct mem_ref *) DDR_A (dep)->aux; 1668 ref = (struct mem_ref *) DDR_A (dep)->aux;
1597 refb = (struct mem_ref *) DDR_B (dep)->aux; 1669 refb = (struct mem_ref *) DDR_B (dep)->aux;
1598 1670
1599 if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know 1671 if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
1672 || DDR_COULD_BE_INDEPENDENT_P (dep)
1600 || DDR_NUM_DIST_VECTS (dep) == 0) 1673 || DDR_NUM_DIST_VECTS (dep) == 0)
1601 { 1674 {
1602 /* If the dependence cannot be analyzed, assume that there might be 1675 /* If the dependence cannot be analyzed, assume that there might be
1603 a reuse. */ 1676 a reuse. */
1604 dist = 0; 1677 dist = 0;
1660 if (dump_file && (dump_flags & TDF_DETAILS)) 1733 if (dump_file && (dump_flags & TDF_DETAILS))
1661 { 1734 {
1662 fprintf (dump_file, "Reuse distances:\n"); 1735 fprintf (dump_file, "Reuse distances:\n");
1663 for (gr = refs; gr; gr = gr->next) 1736 for (gr = refs; gr; gr = gr->next)
1664 for (ref = gr->refs; ref; ref = ref->next) 1737 for (ref = gr->refs; ref; ref = ref->next)
1665 fprintf (dump_file, " ref %p distance %u\n", 1738 fprintf (dump_file, " reference %u:%u distance %u\n",
1666 (void *) ref, ref->reuse_distance); 1739 ref->group->uid, ref->uid, ref->reuse_distance);
1667 } 1740 }
1741
1742 return true;
1668 } 1743 }
1669 1744
1670 /* Determine whether or not the trip count to ahead ratio is too small based 1745 /* Determine whether or not the trip count to ahead ratio is too small based
1671 on prefitablility consideration. 1746 on prefitablility consideration.
1672 AHEAD: the iteration ahead distance, 1747 AHEAD: the iteration ahead distance,
1799 time = tree_num_loop_insns (loop, &eni_time_weights); 1874 time = tree_num_loop_insns (loop, &eni_time_weights);
1800 if (time == 0) 1875 if (time == 0)
1801 return false; 1876 return false;
1802 1877
1803 ahead = (PREFETCH_LATENCY + time - 1) / time; 1878 ahead = (PREFETCH_LATENCY + time - 1) / time;
1804 est_niter = estimated_loop_iterations_int (loop, false); 1879 est_niter = estimated_stmt_executions_int (loop);
1880 if (est_niter == -1)
1881 est_niter = likely_max_stmt_executions_int (loop);
1805 1882
1806 /* Prefetching is not likely to be profitable if the trip count to ahead 1883 /* Prefetching is not likely to be profitable if the trip count to ahead
1807 ratio is too small. */ 1884 ratio is too small. */
1808 if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter)) 1885 if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter))
1809 return false; 1886 return false;
1823 prune_by_reuse (refs); 1900 prune_by_reuse (refs);
1824 1901
1825 if (nothing_to_prefetch_p (refs)) 1902 if (nothing_to_prefetch_p (refs))
1826 goto fail; 1903 goto fail;
1827 1904
1828 determine_loop_nest_reuse (loop, refs, no_other_refs); 1905 if (!determine_loop_nest_reuse (loop, refs, no_other_refs))
1906 goto fail;
1829 1907
1830 /* Step 3: determine unroll factor. */ 1908 /* Step 3: determine unroll factor. */
1831 unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, 1909 unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
1832 est_niter); 1910 est_niter);
1833 1911
1875 /* Issue prefetch instructions for array references in loops. */ 1953 /* Issue prefetch instructions for array references in loops. */
1876 1954
1877 unsigned int 1955 unsigned int
1878 tree_ssa_prefetch_arrays (void) 1956 tree_ssa_prefetch_arrays (void)
1879 { 1957 {
1880 loop_iterator li;
1881 struct loop *loop; 1958 struct loop *loop;
1882 bool unrolled = false; 1959 bool unrolled = false;
1883 int todo_flags = 0; 1960 int todo_flags = 0;
1884 1961
1885 if (!HAVE_prefetch 1962 if (!targetm.have_prefetch ()
1886 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. 1963 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4.
1887 -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part 1964 -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part
1888 of processor costs and i486 does not have prefetch, but 1965 of processor costs and i486 does not have prefetch, but
1889 -march=pentium4 causes HAVE_prefetch to be true. Ugh. */ 1966 -march=pentium4 causes targetm.have_prefetch to be true. Ugh. */
1890 || PREFETCH_BLOCK == 0) 1967 || PREFETCH_BLOCK == 0)
1891 return 0; 1968 return 0;
1892 1969
1893 if (dump_file && (dump_flags & TDF_DETAILS)) 1970 if (dump_file && (dump_flags & TDF_DETAILS))
1894 { 1971 {
1908 fprintf (dump_file, "\n"); 1985 fprintf (dump_file, "\n");
1909 } 1986 }
1910 1987
1911 initialize_original_copy_tables (); 1988 initialize_original_copy_tables ();
1912 1989
1913 if (!built_in_decls[BUILT_IN_PREFETCH]) 1990 if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH))
1914 { 1991 {
1915 tree type = build_function_type_list (void_type_node, 1992 tree type = build_function_type_list (void_type_node,
1916 const_ptr_type_node, NULL_TREE); 1993 const_ptr_type_node, NULL_TREE);
1917 tree decl = add_builtin_function ("__builtin_prefetch", type, 1994 tree decl = add_builtin_function ("__builtin_prefetch", type,
1918 BUILT_IN_PREFETCH, BUILT_IN_NORMAL, 1995 BUILT_IN_PREFETCH, BUILT_IN_NORMAL,
1919 NULL, NULL_TREE); 1996 NULL, NULL_TREE);
1920 DECL_IS_NOVOPS (decl) = true; 1997 DECL_IS_NOVOPS (decl) = true;
1921 built_in_decls[BUILT_IN_PREFETCH] = decl; 1998 set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
1922 } 1999 }
1923 2000
1924 /* We assume that size of cache line is a power of two, so verify this 2001 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1925 here. */
1926 gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0);
1927
1928 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1929 { 2002 {
1930 if (dump_file && (dump_flags & TDF_DETAILS)) 2003 if (dump_file && (dump_flags & TDF_DETAILS))
1931 fprintf (dump_file, "Processing loop %d:\n", loop->num); 2004 fprintf (dump_file, "Processing loop %d:\n", loop->num);
1932 2005
1933 unrolled |= loop_prefetch_arrays (loop); 2006 unrolled |= loop_prefetch_arrays (loop);
1943 } 2016 }
1944 2017
1945 free_original_copy_tables (); 2018 free_original_copy_tables ();
1946 return todo_flags; 2019 return todo_flags;
1947 } 2020 }
2021
2022 /* Prefetching. */
2023
2024 namespace {
2025
2026 const pass_data pass_data_loop_prefetch =
2027 {
2028 GIMPLE_PASS, /* type */
2029 "aprefetch", /* name */
2030 OPTGROUP_LOOP, /* optinfo_flags */
2031 TV_TREE_PREFETCH, /* tv_id */
2032 ( PROP_cfg | PROP_ssa ), /* properties_required */
2033 0, /* properties_provided */
2034 0, /* properties_destroyed */
2035 0, /* todo_flags_start */
2036 0, /* todo_flags_finish */
2037 };
2038
2039 class pass_loop_prefetch : public gimple_opt_pass
2040 {
2041 public:
2042 pass_loop_prefetch (gcc::context *ctxt)
2043 : gimple_opt_pass (pass_data_loop_prefetch, ctxt)
2044 {}
2045
2046 /* opt_pass methods: */
2047 virtual bool gate (function *) { return flag_prefetch_loop_arrays > 0; }
2048 virtual unsigned int execute (function *);
2049
2050 }; // class pass_loop_prefetch
2051
2052 unsigned int
2053 pass_loop_prefetch::execute (function *fun)
2054 {
2055 if (number_of_loops (fun) <= 1)
2056 return 0;
2057
2058 if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0)
2059 {
2060 static bool warned = false;
2061
2062 if (!warned)
2063 {
2064 warning (OPT_Wdisabled_optimization,
2065 "%<l1-cache-size%> parameter is not a power of two %d",
2066 PREFETCH_BLOCK);
2067 warned = true;
2068 }
2069 return 0;
2070 }
2071
2072 return tree_ssa_prefetch_arrays ();
2073 }
2074
2075 } // anon namespace
2076
2077 gimple_opt_pass *
2078 make_pass_loop_prefetch (gcc::context *ctxt)
2079 {
2080 return new pass_loop_prefetch (ctxt);
2081 }
2082
2083