comparison gcc/tree-ssa-loop-prefetch.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Array prefetching. 1 /* Array prefetching.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 3
4 This file is part of GCC. 4 This file is part of GCC.
5 5
6 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
41 #include "tree-ssa-loop.h" 41 #include "tree-ssa-loop.h"
42 #include "ssa.h" 42 #include "ssa.h"
43 #include "tree-into-ssa.h" 43 #include "tree-into-ssa.h"
44 #include "cfgloop.h" 44 #include "cfgloop.h"
45 #include "tree-scalar-evolution.h" 45 #include "tree-scalar-evolution.h"
46 #include "params.h"
47 #include "langhooks.h" 46 #include "langhooks.h"
48 #include "tree-inline.h" 47 #include "tree-inline.h"
49 #include "tree-data-ref.h" 48 #include "tree-data-ref.h"
50 #include "diagnostic-core.h" 49 #include "diagnostic-core.h"
51 #include "dbgcnt.h" 50 #include "dbgcnt.h"
165 /* The size of the block loaded by a single prefetch. Usually, this is 164 /* The size of the block loaded by a single prefetch. Usually, this is
166 the same as cache line size (at the moment, we only consider one level 165 the same as cache line size (at the moment, we only consider one level
167 of cache hierarchy). */ 166 of cache hierarchy). */
168 167
169 #ifndef PREFETCH_BLOCK 168 #ifndef PREFETCH_BLOCK
170 #define PREFETCH_BLOCK L1_CACHE_LINE_SIZE 169 #define PREFETCH_BLOCK param_l1_cache_line_size
171 #endif 170 #endif
172 171
173 /* Do we have a forward hardware sequential prefetching? */ 172 /* Do we have a forward hardware sequential prefetching? */
174 173
175 #ifndef HAVE_FORWARD_PREFETCH 174 #ifndef HAVE_FORWARD_PREFETCH
189 188
190 #ifndef ACCEPTABLE_MISS_RATE 189 #ifndef ACCEPTABLE_MISS_RATE
191 #define ACCEPTABLE_MISS_RATE 50 190 #define ACCEPTABLE_MISS_RATE 50
192 #endif 191 #endif
193 192
194 #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024)) 193 #define L1_CACHE_SIZE_BYTES ((unsigned) (param_l1_cache_size * 1024))
195 #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024)) 194 #define L2_CACHE_SIZE_BYTES ((unsigned) (param_l2_cache_size * 1024))
196 195
197 /* We consider a memory access nontemporal if it is not reused sooner than 196 /* We consider a memory access nontemporal if it is not reused sooner than
198 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore 197 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore
199 accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, 198 accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
200 so that we use nontemporal prefetches e.g. if single memory location 199 so that we use nontemporal prefetches e.g. if single memory location
419 418
420 /* A structure used to pass arguments to idx_analyze_ref. */ 419 /* A structure used to pass arguments to idx_analyze_ref. */
421 420
422 struct ar_data 421 struct ar_data
423 { 422 {
424 struct loop *loop; /* Loop of the reference. */ 423 class loop *loop; /* Loop of the reference. */
425 gimple *stmt; /* Statement of the reference. */ 424 gimple *stmt; /* Statement of the reference. */
426 tree *step; /* Step of the memory reference. */ 425 tree *step; /* Step of the memory reference. */
427 HOST_WIDE_INT *delta; /* Offset of the memory reference. */ 426 HOST_WIDE_INT *delta; /* Offset of the memory reference. */
428 }; 427 };
429 428
484 STEP are integer constants and iter is number of iterations of LOOP. The 483 STEP are integer constants and iter is number of iterations of LOOP. The
485 reference occurs in statement STMT. Strips nonaddressable component 484 reference occurs in statement STMT. Strips nonaddressable component
486 references from REF_P. */ 485 references from REF_P. */
487 486
488 static bool 487 static bool
489 analyze_ref (struct loop *loop, tree *ref_p, tree *base, 488 analyze_ref (class loop *loop, tree *ref_p, tree *base,
490 tree *step, HOST_WIDE_INT *delta, 489 tree *step, HOST_WIDE_INT *delta,
491 gimple *stmt) 490 gimple *stmt)
492 { 491 {
493 struct ar_data ar_data; 492 struct ar_data ar_data;
494 tree off; 493 tree off;
533 /* Record a memory reference REF to the list REFS. The reference occurs in 532 /* Record a memory reference REF to the list REFS. The reference occurs in
534 LOOP in statement STMT and it is write if WRITE_P. Returns true if the 533 LOOP in statement STMT and it is write if WRITE_P. Returns true if the
535 reference was recorded, false otherwise. */ 534 reference was recorded, false otherwise. */
536 535
537 static bool 536 static bool
538 gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, 537 gather_memory_references_ref (class loop *loop, struct mem_ref_group **refs,
539 tree ref, bool write_p, gimple *stmt) 538 tree ref, bool write_p, gimple *stmt)
540 { 539 {
541 tree base, step; 540 tree base, step;
542 HOST_WIDE_INT delta; 541 HOST_WIDE_INT delta;
543 struct mem_ref_group *agrp; 542 struct mem_ref_group *agrp;
604 603
605 /* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to 604 /* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
606 true if there are no other memory references inside the loop. */ 605 true if there are no other memory references inside the loop. */
607 606
608 static struct mem_ref_group * 607 static struct mem_ref_group *
609 gather_memory_references (struct loop *loop, bool *no_other_refs, unsigned *ref_count) 608 gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_count)
610 { 609 {
611 basic_block *body = get_loop_body_in_dom_order (loop); 610 basic_block *body = get_loop_body_in_dom_order (loop);
612 basic_block bb; 611 basic_block bb;
613 unsigned i; 612 unsigned i;
614 gimple_stmt_iterator bsi; 613 gimple_stmt_iterator bsi;
991 990
992 static bool 991 static bool
993 should_issue_prefetch_p (struct mem_ref *ref) 992 should_issue_prefetch_p (struct mem_ref *ref)
994 { 993 {
995 /* Do we want to issue prefetches for non-constant strides? */ 994 /* Do we want to issue prefetches for non-constant strides? */
996 if (!cst_and_fits_in_hwi (ref->group->step) && PREFETCH_DYNAMIC_STRIDES == 0) 995 if (!cst_and_fits_in_hwi (ref->group->step)
996 && param_prefetch_dynamic_strides == 0)
997 { 997 {
998 if (dump_file && (dump_flags & TDF_DETAILS)) 998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, 999 fprintf (dump_file,
1000 "Skipping non-constant step for reference %u:%u\n", 1000 "Skipping non-constant step for reference %u:%u\n",
1001 ref->group->uid, ref->uid); 1001 ref->group->uid, ref->uid);
1006 prefetch hints for a range of strides. Make sure we don't issue 1006 prefetch hints for a range of strides. Make sure we don't issue
1007 prefetches for such cases if the stride is within this particular 1007 prefetches for such cases if the stride is within this particular
1008 range. */ 1008 range. */
1009 if (cst_and_fits_in_hwi (ref->group->step) 1009 if (cst_and_fits_in_hwi (ref->group->step)
1010 && abs_hwi (int_cst_value (ref->group->step)) 1010 && abs_hwi (int_cst_value (ref->group->step))
1011 < (HOST_WIDE_INT) PREFETCH_MINIMUM_STRIDE) 1011 < (HOST_WIDE_INT) param_prefetch_minimum_stride)
1012 { 1012 {
1013 if (dump_file && (dump_flags & TDF_DETAILS)) 1013 if (dump_file && (dump_flags & TDF_DETAILS))
1014 fprintf (dump_file, 1014 fprintf (dump_file,
1015 "Step for reference %u:%u (" HOST_WIDE_INT_PRINT_DEC 1015 "Step for reference %u:%u (" HOST_WIDE_INT_PRINT_DEC
1016 ") is less than the mininum required stride of %d\n", 1016 ") is less than the mininum required stride of %d\n",
1017 ref->group->uid, ref->uid, int_cst_value (ref->group->step), 1017 ref->group->uid, ref->uid, int_cst_value (ref->group->step),
1018 PREFETCH_MINIMUM_STRIDE); 1018 param_prefetch_minimum_stride);
1019 return false; 1019 return false;
1020 } 1020 }
1021 1021
1022 /* For now do not issue prefetches for only first few of the 1022 /* For now do not issue prefetches for only first few of the
1023 iterations. */ 1023 iterations. */
1053 unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots; 1053 unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots;
1054 unsigned slots_per_prefetch; 1054 unsigned slots_per_prefetch;
1055 struct mem_ref *ref; 1055 struct mem_ref *ref;
1056 bool any = false; 1056 bool any = false;
1057 1057
1058 /* At most SIMULTANEOUS_PREFETCHES should be running at the same time. */ 1058 /* At most param_simultaneous_prefetches should be running
1059 remaining_prefetch_slots = SIMULTANEOUS_PREFETCHES; 1059 at the same time. */
1060 remaining_prefetch_slots = param_simultaneous_prefetches;
1060 1061
1061 /* The prefetch will run for AHEAD iterations of the original loop, i.e., 1062 /* The prefetch will run for AHEAD iterations of the original loop, i.e.,
1062 AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration, 1063 AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration,
1063 it will need a prefetch slot. */ 1064 it will need a prefetch slot. */
1064 slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor; 1065 slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor;
1284 } 1285 }
1285 1286
1286 /* Issue a memory fence instruction after LOOP. */ 1287 /* Issue a memory fence instruction after LOOP. */
1287 1288
1288 static void 1289 static void
1289 emit_mfence_after_loop (struct loop *loop) 1290 emit_mfence_after_loop (class loop *loop)
1290 { 1291 {
1291 vec<edge> exits = get_loop_exit_edges (loop); 1292 vec<edge> exits = get_loop_exit_edges (loop);
1292 edge exit; 1293 edge exit;
1293 gcall *call; 1294 gcall *call;
1294 gimple_stmt_iterator bsi; 1295 gimple_stmt_iterator bsi;
1313 } 1314 }
1314 1315
1315 /* Returns true if we can use storent in loop, false otherwise. */ 1316 /* Returns true if we can use storent in loop, false otherwise. */
1316 1317
1317 static bool 1318 static bool
1318 may_use_storent_in_loop_p (struct loop *loop) 1319 may_use_storent_in_loop_p (class loop *loop)
1319 { 1320 {
1320 bool ret = true; 1321 bool ret = true;
1321 1322
1322 if (loop->inner != NULL) 1323 if (loop->inner != NULL)
1323 return false; 1324 return false;
1343 1344
1344 /* Marks nontemporal stores in LOOP. GROUPS contains the description of memory 1345 /* Marks nontemporal stores in LOOP. GROUPS contains the description of memory
1345 references in the loop. */ 1346 references in the loop. */
1346 1347
1347 static void 1348 static void
1348 mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups) 1349 mark_nontemporal_stores (class loop *loop, struct mem_ref_group *groups)
1349 { 1350 {
1350 struct mem_ref *ref; 1351 struct mem_ref *ref;
1351 bool any = false; 1352 bool any = false;
1352 1353
1353 if (!may_use_storent_in_loop_p (loop)) 1354 if (!may_use_storent_in_loop_p (loop))
1364 /* Determines whether we can profitably unroll LOOP FACTOR times, and if 1365 /* Determines whether we can profitably unroll LOOP FACTOR times, and if
1365 this is the case, fill in DESC by the description of number of 1366 this is the case, fill in DESC by the description of number of
1366 iterations. */ 1367 iterations. */
1367 1368
1368 static bool 1369 static bool
1369 should_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc, 1370 should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc,
1370 unsigned factor) 1371 unsigned factor)
1371 { 1372 {
1372 if (!can_unroll_loop_p (loop, factor, desc)) 1373 if (!can_unroll_loop_p (loop, factor, desc))
1373 return false; 1374 return false;
1374 1375
1388 number of iterations of LOOP is stored to DESC. NINSNS is the number of 1389 number of iterations of LOOP is stored to DESC. NINSNS is the number of
1389 insns of the LOOP. EST_NITER is the estimated number of iterations of 1390 insns of the LOOP. EST_NITER is the estimated number of iterations of
1390 the loop, or -1 if no estimate is available. */ 1391 the loop, or -1 if no estimate is available. */
1391 1392
1392 static unsigned 1393 static unsigned
1393 determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, 1394 determine_unroll_factor (class loop *loop, struct mem_ref_group *refs,
1394 unsigned ninsns, struct tree_niter_desc *desc, 1395 unsigned ninsns, class tree_niter_desc *desc,
1395 HOST_WIDE_INT est_niter) 1396 HOST_WIDE_INT est_niter)
1396 { 1397 {
1397 unsigned upper_bound; 1398 unsigned upper_bound;
1398 unsigned nfactor, factor, mod_constraint; 1399 unsigned nfactor, factor, mod_constraint;
1399 struct mem_ref_group *agp; 1400 struct mem_ref_group *agp;
1404 from unrolling them enough to make exactly one cache line covered by each 1405 from unrolling them enough to make exactly one cache line covered by each
1405 iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent 1406 iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent
1406 us from unrolling the loops too many times in cases where we only expect 1407 us from unrolling the loops too many times in cases where we only expect
1407 gains from better scheduling and decreasing loop overhead, which is not 1408 gains from better scheduling and decreasing loop overhead, which is not
1408 the case here. */ 1409 the case here. */
1409 upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; 1410 upper_bound = param_max_unrolled_insns / ninsns;
1410 1411
1411 /* If we unrolled the loop more times than it iterates, the unrolled version 1412 /* If we unrolled the loop more times than it iterates, the unrolled version
1412 of the loop would be never entered. */ 1413 of the loop would be never entered. */
1413 if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound) 1414 if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound)
1414 upper_bound = est_niter; 1415 upper_bound = est_niter;
1457 /* If several iterations access the same cache line, use the size of 1458 /* If several iterations access the same cache line, use the size of
1458 the line divided by this number. Otherwise, a cache line is 1459 the line divided by this number. Otherwise, a cache line is
1459 accessed in each iteration. TODO -- in the latter case, we should 1460 accessed in each iteration. TODO -- in the latter case, we should
1460 take the size of the reference into account, rounding it up on cache 1461 take the size of the reference into account, rounding it up on cache
1461 line size multiple. */ 1462 line size multiple. */
1462 volume += L1_CACHE_LINE_SIZE / ref->prefetch_mod; 1463 volume += param_l1_cache_line_size / ref->prefetch_mod;
1463 } 1464 }
1464 return volume; 1465 return volume;
1465 } 1466 }
1466 1467
1467 /* Returns the volume of memory references accessed across VEC iterations of 1468 /* Returns the volume of memory references accessed across VEC iterations of
1491 at the position corresponding to the loop of the step. N is the depth 1492 at the position corresponding to the loop of the step. N is the depth
1492 of the considered loop nest, and, LOOP is its innermost loop. */ 1493 of the considered loop nest, and, LOOP is its innermost loop. */
1493 1494
1494 static void 1495 static void
1495 add_subscript_strides (tree access_fn, unsigned stride, 1496 add_subscript_strides (tree access_fn, unsigned stride,
1496 HOST_WIDE_INT *strides, unsigned n, struct loop *loop) 1497 HOST_WIDE_INT *strides, unsigned n, class loop *loop)
1497 { 1498 {
1498 struct loop *aloop; 1499 class loop *aloop;
1499 tree step; 1500 tree step;
1500 HOST_WIDE_INT astep; 1501 HOST_WIDE_INT astep;
1501 unsigned min_depth = loop_depth (loop) - n; 1502 unsigned min_depth = loop_depth (loop) - n;
1502 1503
1503 while (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) 1504 while (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1510 continue; 1511 continue;
1511 1512
1512 if (tree_fits_shwi_p (step)) 1513 if (tree_fits_shwi_p (step))
1513 astep = tree_to_shwi (step); 1514 astep = tree_to_shwi (step);
1514 else 1515 else
1515 astep = L1_CACHE_LINE_SIZE; 1516 astep = param_l1_cache_line_size;
1516 1517
1517 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride; 1518 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride;
1518 1519
1519 } 1520 }
1520 } 1521 }
1524 loops, and LOOP_SIZES contains the volumes of accesses in each of the 1525 loops, and LOOP_SIZES contains the volumes of accesses in each of the
1525 loops. LOOP is the innermost loop of the current loop nest. */ 1526 loops. LOOP is the innermost loop of the current loop nest. */
1526 1527
1527 static unsigned 1528 static unsigned
1528 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, 1529 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
1529 struct loop *loop) 1530 class loop *loop)
1530 { 1531 {
1531 tree stride, access_fn; 1532 tree stride, access_fn;
1532 HOST_WIDE_INT *strides, astride; 1533 HOST_WIDE_INT *strides, astride;
1533 vec<tree> access_fns; 1534 vec<tree> access_fns;
1534 tree ref = DR_REF (dr); 1535 tree ref = DR_REF (dr);
1560 { 1561 {
1561 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); 1562 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1562 if (tree_fits_uhwi_p (stride)) 1563 if (tree_fits_uhwi_p (stride))
1563 astride = tree_to_uhwi (stride); 1564 astride = tree_to_uhwi (stride);
1564 else 1565 else
1565 astride = L1_CACHE_LINE_SIZE; 1566 astride = param_l1_cache_line_size;
1566 1567
1567 ref = TREE_OPERAND (ref, 0); 1568 ref = TREE_OPERAND (ref, 0);
1568 } 1569 }
1569 else 1570 else
1570 astride = 1; 1571 astride = 1;
1576 { 1577 {
1577 unsigned HOST_WIDE_INT s; 1578 unsigned HOST_WIDE_INT s;
1578 1579
1579 s = strides[i] < 0 ? -strides[i] : strides[i]; 1580 s = strides[i] < 0 ? -strides[i] : strides[i];
1580 1581
1581 if (s < (unsigned) L1_CACHE_LINE_SIZE 1582 if (s < (unsigned) param_l1_cache_line_size
1582 && (loop_sizes[i] 1583 && (loop_sizes[i]
1583 > (unsigned) (L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION))) 1584 > (unsigned) (L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION)))
1584 { 1585 {
1585 ret = loop_sizes[i]; 1586 ret = loop_sizes[i];
1586 break; 1587 break;
1594 /* Determines the distance till the first reuse of each reference in REFS 1595 /* Determines the distance till the first reuse of each reference in REFS
1595 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other 1596 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
1596 memory references in the loop. Return false if the analysis fails. */ 1597 memory references in the loop. Return false if the analysis fails. */
1597 1598
1598 static bool 1599 static bool
1599 determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, 1600 determine_loop_nest_reuse (class loop *loop, struct mem_ref_group *refs,
1600 bool no_other_refs) 1601 bool no_other_refs)
1601 { 1602 {
1602 struct loop *nest, *aloop; 1603 class loop *nest, *aloop;
1603 vec<data_reference_p> datarefs = vNULL; 1604 vec<data_reference_p> datarefs = vNULL;
1604 vec<ddr_p> dependences = vNULL; 1605 vec<ddr_p> dependences = vNULL;
1605 struct mem_ref_group *gr; 1606 struct mem_ref_group *gr;
1606 struct mem_ref *ref, *refb; 1607 struct mem_ref *ref, *refb;
1607 auto_vec<loop_p> vloops; 1608 auto_vec<loop_p> vloops;
1823 TODO: Implement a more precise computation by estimating the time 1824 TODO: Implement a more precise computation by estimating the time
1824 for each CPU or memory op in the loop. Time estimates for memory ops 1825 for each CPU or memory op in the loop. Time estimates for memory ops
1825 should account for cache misses. */ 1826 should account for cache misses. */
1826 insn_to_mem_ratio = ninsns / mem_ref_count; 1827 insn_to_mem_ratio = ninsns / mem_ref_count;
1827 1828
1828 if (insn_to_mem_ratio < PREFETCH_MIN_INSN_TO_MEM_RATIO) 1829 if (insn_to_mem_ratio < param_prefetch_min_insn_to_mem_ratio)
1829 { 1830 {
1830 if (dump_file && (dump_flags & TDF_DETAILS)) 1831 if (dump_file && (dump_flags & TDF_DETAILS))
1831 fprintf (dump_file, 1832 fprintf (dump_file,
1832 "Not prefetching -- instruction to memory reference ratio (%d) too small\n", 1833 "Not prefetching -- instruction to memory reference ratio (%d) too small\n",
1833 insn_to_mem_ratio); 1834 insn_to_mem_ratio);
1860 loop will usually be significantly smaller than the number of insns of the 1861 loop will usually be significantly smaller than the number of insns of the
1861 original loop * unroll_factor (at least the induction variable increases 1862 original loop * unroll_factor (at least the induction variable increases
1862 and the exit branches will get eliminated), so it might be better to use 1863 and the exit branches will get eliminated), so it might be better to use
1863 tree_estimate_loop_size + estimated_unrolled_size. */ 1864 tree_estimate_loop_size + estimated_unrolled_size. */
1864 insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; 1865 insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count;
1865 if (insn_to_prefetch_ratio < MIN_INSN_TO_PREFETCH_RATIO) 1866 if (insn_to_prefetch_ratio < param_min_insn_to_prefetch_ratio)
1866 { 1867 {
1867 if (dump_file && (dump_flags & TDF_DETAILS)) 1868 if (dump_file && (dump_flags & TDF_DETAILS))
1868 fprintf (dump_file, 1869 fprintf (dump_file,
1869 "Not prefetching -- instruction to prefetch ratio (%d) too small\n", 1870 "Not prefetching -- instruction to prefetch ratio (%d) too small\n",
1870 insn_to_prefetch_ratio); 1871 insn_to_prefetch_ratio);
1877 1878
1878 /* Issue prefetch instructions for array references in LOOP. Returns 1879 /* Issue prefetch instructions for array references in LOOP. Returns
1879 true if the LOOP was unrolled. */ 1880 true if the LOOP was unrolled. */
1880 1881
1881 static bool 1882 static bool
1882 loop_prefetch_arrays (struct loop *loop) 1883 loop_prefetch_arrays (class loop *loop)
1883 { 1884 {
1884 struct mem_ref_group *refs; 1885 struct mem_ref_group *refs;
1885 unsigned ahead, ninsns, time, unroll_factor; 1886 unsigned ahead, ninsns, time, unroll_factor;
1886 HOST_WIDE_INT est_niter; 1887 HOST_WIDE_INT est_niter;
1887 struct tree_niter_desc desc; 1888 class tree_niter_desc desc;
1888 bool unrolled = false, no_other_refs; 1889 bool unrolled = false, no_other_refs;
1889 unsigned prefetch_count; 1890 unsigned prefetch_count;
1890 unsigned mem_ref_count; 1891 unsigned mem_ref_count;
1891 1892
1892 if (optimize_loop_nest_for_size_p (loop)) 1893 if (optimize_loop_nest_for_size_p (loop))
1900 the loop body. */ 1901 the loop body. */
1901 time = tree_num_loop_insns (loop, &eni_time_weights); 1902 time = tree_num_loop_insns (loop, &eni_time_weights);
1902 if (time == 0) 1903 if (time == 0)
1903 return false; 1904 return false;
1904 1905
1905 ahead = (PREFETCH_LATENCY + time - 1) / time; 1906 ahead = (param_prefetch_latency + time - 1) / time;
1906 est_niter = estimated_stmt_executions_int (loop); 1907 est_niter = estimated_stmt_executions_int (loop);
1907 if (est_niter == -1) 1908 if (est_niter == -1)
1908 est_niter = likely_max_stmt_executions_int (loop); 1909 est_niter = likely_max_stmt_executions_int (loop);
1909 1910
1910 /* Prefetching is not likely to be profitable if the trip count to ahead 1911 /* Prefetching is not likely to be profitable if the trip count to ahead
1980 /* Issue prefetch instructions for array references in loops. */ 1981 /* Issue prefetch instructions for array references in loops. */
1981 1982
1982 unsigned int 1983 unsigned int
1983 tree_ssa_prefetch_arrays (void) 1984 tree_ssa_prefetch_arrays (void)
1984 { 1985 {
1985 struct loop *loop; 1986 class loop *loop;
1986 bool unrolled = false; 1987 bool unrolled = false;
1987 int todo_flags = 0; 1988 int todo_flags = 0;
1988 1989
1989 if (!targetm.have_prefetch () 1990 if (!targetm.have_prefetch ()
1990 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. 1991 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4.
1996 1997
1997 if (dump_file && (dump_flags & TDF_DETAILS)) 1998 if (dump_file && (dump_flags & TDF_DETAILS))
1998 { 1999 {
1999 fprintf (dump_file, "Prefetching parameters:\n"); 2000 fprintf (dump_file, "Prefetching parameters:\n");
2000 fprintf (dump_file, " simultaneous prefetches: %d\n", 2001 fprintf (dump_file, " simultaneous prefetches: %d\n",
2001 SIMULTANEOUS_PREFETCHES); 2002 param_simultaneous_prefetches);
2002 fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY); 2003 fprintf (dump_file, " prefetch latency: %d\n", param_prefetch_latency);
2003 fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); 2004 fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK);
2004 fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", 2005 fprintf (dump_file, " L1 cache size: %d lines, %d kB\n",
2005 L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE); 2006 L1_CACHE_SIZE_BYTES / param_l1_cache_line_size,
2006 fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); 2007 param_l1_cache_size);
2007 fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); 2008 fprintf (dump_file, " L1 cache line size: %d\n",
2009 param_l1_cache_line_size);
2010 fprintf (dump_file, " L2 cache size: %d kB\n", param_l2_cache_size);
2008 fprintf (dump_file, " min insn-to-prefetch ratio: %d \n", 2011 fprintf (dump_file, " min insn-to-prefetch ratio: %d \n",
2009 MIN_INSN_TO_PREFETCH_RATIO); 2012 param_min_insn_to_prefetch_ratio);
2010 fprintf (dump_file, " min insn-to-mem ratio: %d \n", 2013 fprintf (dump_file, " min insn-to-mem ratio: %d \n",
2011 PREFETCH_MIN_INSN_TO_MEM_RATIO); 2014 param_prefetch_min_insn_to_mem_ratio);
2012 fprintf (dump_file, "\n"); 2015 fprintf (dump_file, "\n");
2013 } 2016 }
2014 2017
2015 initialize_original_copy_tables (); 2018 initialize_original_copy_tables ();
2016 2019