Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-ssa-loop-prefetch.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Array prefetching. | 1 /* Array prefetching. |
2 Copyright (C) 2005-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | 7 under the terms of the GNU General Public License as published by the |
41 #include "tree-ssa-loop.h" | 41 #include "tree-ssa-loop.h" |
42 #include "ssa.h" | 42 #include "ssa.h" |
43 #include "tree-into-ssa.h" | 43 #include "tree-into-ssa.h" |
44 #include "cfgloop.h" | 44 #include "cfgloop.h" |
45 #include "tree-scalar-evolution.h" | 45 #include "tree-scalar-evolution.h" |
46 #include "params.h" | |
47 #include "langhooks.h" | 46 #include "langhooks.h" |
48 #include "tree-inline.h" | 47 #include "tree-inline.h" |
49 #include "tree-data-ref.h" | 48 #include "tree-data-ref.h" |
50 #include "diagnostic-core.h" | 49 #include "diagnostic-core.h" |
51 #include "dbgcnt.h" | 50 #include "dbgcnt.h" |
165 /* The size of the block loaded by a single prefetch. Usually, this is | 164 /* The size of the block loaded by a single prefetch. Usually, this is |
166 the same as cache line size (at the moment, we only consider one level | 165 the same as cache line size (at the moment, we only consider one level |
167 of cache hierarchy). */ | 166 of cache hierarchy). */ |
168 | 167 |
169 #ifndef PREFETCH_BLOCK | 168 #ifndef PREFETCH_BLOCK |
170 #define PREFETCH_BLOCK L1_CACHE_LINE_SIZE | 169 #define PREFETCH_BLOCK param_l1_cache_line_size |
171 #endif | 170 #endif |
172 | 171 |
173 /* Do we have a forward hardware sequential prefetching? */ | 172 /* Do we have a forward hardware sequential prefetching? */ |
174 | 173 |
175 #ifndef HAVE_FORWARD_PREFETCH | 174 #ifndef HAVE_FORWARD_PREFETCH |
189 | 188 |
190 #ifndef ACCEPTABLE_MISS_RATE | 189 #ifndef ACCEPTABLE_MISS_RATE |
191 #define ACCEPTABLE_MISS_RATE 50 | 190 #define ACCEPTABLE_MISS_RATE 50 |
192 #endif | 191 #endif |
193 | 192 |
194 #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024)) | 193 #define L1_CACHE_SIZE_BYTES ((unsigned) (param_l1_cache_size * 1024)) |
195 #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024)) | 194 #define L2_CACHE_SIZE_BYTES ((unsigned) (param_l2_cache_size * 1024)) |
196 | 195 |
197 /* We consider a memory access nontemporal if it is not reused sooner than | 196 /* We consider a memory access nontemporal if it is not reused sooner than |
198 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore | 197 after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore |
199 accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, | 198 accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, |
200 so that we use nontemporal prefetches e.g. if single memory location | 199 so that we use nontemporal prefetches e.g. if single memory location |
419 | 418 |
420 /* A structure used to pass arguments to idx_analyze_ref. */ | 419 /* A structure used to pass arguments to idx_analyze_ref. */ |
421 | 420 |
422 struct ar_data | 421 struct ar_data |
423 { | 422 { |
424 struct loop *loop; /* Loop of the reference. */ | 423 class loop *loop; /* Loop of the reference. */ |
425 gimple *stmt; /* Statement of the reference. */ | 424 gimple *stmt; /* Statement of the reference. */ |
426 tree *step; /* Step of the memory reference. */ | 425 tree *step; /* Step of the memory reference. */ |
427 HOST_WIDE_INT *delta; /* Offset of the memory reference. */ | 426 HOST_WIDE_INT *delta; /* Offset of the memory reference. */ |
428 }; | 427 }; |
429 | 428 |
484 STEP are integer constants and iter is number of iterations of LOOP. The | 483 STEP are integer constants and iter is number of iterations of LOOP. The |
485 reference occurs in statement STMT. Strips nonaddressable component | 484 reference occurs in statement STMT. Strips nonaddressable component |
486 references from REF_P. */ | 485 references from REF_P. */ |
487 | 486 |
488 static bool | 487 static bool |
489 analyze_ref (struct loop *loop, tree *ref_p, tree *base, | 488 analyze_ref (class loop *loop, tree *ref_p, tree *base, |
490 tree *step, HOST_WIDE_INT *delta, | 489 tree *step, HOST_WIDE_INT *delta, |
491 gimple *stmt) | 490 gimple *stmt) |
492 { | 491 { |
493 struct ar_data ar_data; | 492 struct ar_data ar_data; |
494 tree off; | 493 tree off; |
533 /* Record a memory reference REF to the list REFS. The reference occurs in | 532 /* Record a memory reference REF to the list REFS. The reference occurs in |
534 LOOP in statement STMT and it is write if WRITE_P. Returns true if the | 533 LOOP in statement STMT and it is write if WRITE_P. Returns true if the |
535 reference was recorded, false otherwise. */ | 534 reference was recorded, false otherwise. */ |
536 | 535 |
537 static bool | 536 static bool |
538 gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, | 537 gather_memory_references_ref (class loop *loop, struct mem_ref_group **refs, |
539 tree ref, bool write_p, gimple *stmt) | 538 tree ref, bool write_p, gimple *stmt) |
540 { | 539 { |
541 tree base, step; | 540 tree base, step; |
542 HOST_WIDE_INT delta; | 541 HOST_WIDE_INT delta; |
543 struct mem_ref_group *agrp; | 542 struct mem_ref_group *agrp; |
604 | 603 |
605 /* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to | 604 /* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to |
606 true if there are no other memory references inside the loop. */ | 605 true if there are no other memory references inside the loop. */ |
607 | 606 |
608 static struct mem_ref_group * | 607 static struct mem_ref_group * |
609 gather_memory_references (struct loop *loop, bool *no_other_refs, unsigned *ref_count) | 608 gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_count) |
610 { | 609 { |
611 basic_block *body = get_loop_body_in_dom_order (loop); | 610 basic_block *body = get_loop_body_in_dom_order (loop); |
612 basic_block bb; | 611 basic_block bb; |
613 unsigned i; | 612 unsigned i; |
614 gimple_stmt_iterator bsi; | 613 gimple_stmt_iterator bsi; |
991 | 990 |
992 static bool | 991 static bool |
993 should_issue_prefetch_p (struct mem_ref *ref) | 992 should_issue_prefetch_p (struct mem_ref *ref) |
994 { | 993 { |
995 /* Do we want to issue prefetches for non-constant strides? */ | 994 /* Do we want to issue prefetches for non-constant strides? */ |
996 if (!cst_and_fits_in_hwi (ref->group->step) && PREFETCH_DYNAMIC_STRIDES == 0) | 995 if (!cst_and_fits_in_hwi (ref->group->step) |
996 && param_prefetch_dynamic_strides == 0) | |
997 { | 997 { |
998 if (dump_file && (dump_flags & TDF_DETAILS)) | 998 if (dump_file && (dump_flags & TDF_DETAILS)) |
999 fprintf (dump_file, | 999 fprintf (dump_file, |
1000 "Skipping non-constant step for reference %u:%u\n", | 1000 "Skipping non-constant step for reference %u:%u\n", |
1001 ref->group->uid, ref->uid); | 1001 ref->group->uid, ref->uid); |
1006 prefetch hints for a range of strides. Make sure we don't issue | 1006 prefetch hints for a range of strides. Make sure we don't issue |
1007 prefetches for such cases if the stride is within this particular | 1007 prefetches for such cases if the stride is within this particular |
1008 range. */ | 1008 range. */ |
1009 if (cst_and_fits_in_hwi (ref->group->step) | 1009 if (cst_and_fits_in_hwi (ref->group->step) |
1010 && abs_hwi (int_cst_value (ref->group->step)) | 1010 && abs_hwi (int_cst_value (ref->group->step)) |
1011 < (HOST_WIDE_INT) PREFETCH_MINIMUM_STRIDE) | 1011 < (HOST_WIDE_INT) param_prefetch_minimum_stride) |
1012 { | 1012 { |
1013 if (dump_file && (dump_flags & TDF_DETAILS)) | 1013 if (dump_file && (dump_flags & TDF_DETAILS)) |
1014 fprintf (dump_file, | 1014 fprintf (dump_file, |
1015 "Step for reference %u:%u (" HOST_WIDE_INT_PRINT_DEC | 1015 "Step for reference %u:%u (" HOST_WIDE_INT_PRINT_DEC |
1016 ") is less than the mininum required stride of %d\n", | 1016 ") is less than the mininum required stride of %d\n", |
1017 ref->group->uid, ref->uid, int_cst_value (ref->group->step), | 1017 ref->group->uid, ref->uid, int_cst_value (ref->group->step), |
1018 PREFETCH_MINIMUM_STRIDE); | 1018 param_prefetch_minimum_stride); |
1019 return false; | 1019 return false; |
1020 } | 1020 } |
1021 | 1021 |
1022 /* For now do not issue prefetches for only first few of the | 1022 /* For now do not issue prefetches for only first few of the |
1023 iterations. */ | 1023 iterations. */ |
1053 unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots; | 1053 unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots; |
1054 unsigned slots_per_prefetch; | 1054 unsigned slots_per_prefetch; |
1055 struct mem_ref *ref; | 1055 struct mem_ref *ref; |
1056 bool any = false; | 1056 bool any = false; |
1057 | 1057 |
1058 /* At most SIMULTANEOUS_PREFETCHES should be running at the same time. */ | 1058 /* At most param_simultaneous_prefetches should be running |
1059 remaining_prefetch_slots = SIMULTANEOUS_PREFETCHES; | 1059 at the same time. */ |
1060 remaining_prefetch_slots = param_simultaneous_prefetches; | |
1060 | 1061 |
1061 /* The prefetch will run for AHEAD iterations of the original loop, i.e., | 1062 /* The prefetch will run for AHEAD iterations of the original loop, i.e., |
1062 AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration, | 1063 AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration, |
1063 it will need a prefetch slot. */ | 1064 it will need a prefetch slot. */ |
1064 slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor; | 1065 slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor; |
1284 } | 1285 } |
1285 | 1286 |
1286 /* Issue a memory fence instruction after LOOP. */ | 1287 /* Issue a memory fence instruction after LOOP. */ |
1287 | 1288 |
1288 static void | 1289 static void |
1289 emit_mfence_after_loop (struct loop *loop) | 1290 emit_mfence_after_loop (class loop *loop) |
1290 { | 1291 { |
1291 vec<edge> exits = get_loop_exit_edges (loop); | 1292 vec<edge> exits = get_loop_exit_edges (loop); |
1292 edge exit; | 1293 edge exit; |
1293 gcall *call; | 1294 gcall *call; |
1294 gimple_stmt_iterator bsi; | 1295 gimple_stmt_iterator bsi; |
1313 } | 1314 } |
1314 | 1315 |
1315 /* Returns true if we can use storent in loop, false otherwise. */ | 1316 /* Returns true if we can use storent in loop, false otherwise. */ |
1316 | 1317 |
1317 static bool | 1318 static bool |
1318 may_use_storent_in_loop_p (struct loop *loop) | 1319 may_use_storent_in_loop_p (class loop *loop) |
1319 { | 1320 { |
1320 bool ret = true; | 1321 bool ret = true; |
1321 | 1322 |
1322 if (loop->inner != NULL) | 1323 if (loop->inner != NULL) |
1323 return false; | 1324 return false; |
1343 | 1344 |
1344 /* Marks nontemporal stores in LOOP. GROUPS contains the description of memory | 1345 /* Marks nontemporal stores in LOOP. GROUPS contains the description of memory |
1345 references in the loop. */ | 1346 references in the loop. */ |
1346 | 1347 |
1347 static void | 1348 static void |
1348 mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups) | 1349 mark_nontemporal_stores (class loop *loop, struct mem_ref_group *groups) |
1349 { | 1350 { |
1350 struct mem_ref *ref; | 1351 struct mem_ref *ref; |
1351 bool any = false; | 1352 bool any = false; |
1352 | 1353 |
1353 if (!may_use_storent_in_loop_p (loop)) | 1354 if (!may_use_storent_in_loop_p (loop)) |
1364 /* Determines whether we can profitably unroll LOOP FACTOR times, and if | 1365 /* Determines whether we can profitably unroll LOOP FACTOR times, and if |
1365 this is the case, fill in DESC by the description of number of | 1366 this is the case, fill in DESC by the description of number of |
1366 iterations. */ | 1367 iterations. */ |
1367 | 1368 |
1368 static bool | 1369 static bool |
1369 should_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc, | 1370 should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc, |
1370 unsigned factor) | 1371 unsigned factor) |
1371 { | 1372 { |
1372 if (!can_unroll_loop_p (loop, factor, desc)) | 1373 if (!can_unroll_loop_p (loop, factor, desc)) |
1373 return false; | 1374 return false; |
1374 | 1375 |
1388 number of iterations of LOOP is stored to DESC. NINSNS is the number of | 1389 number of iterations of LOOP is stored to DESC. NINSNS is the number of |
1389 insns of the LOOP. EST_NITER is the estimated number of iterations of | 1390 insns of the LOOP. EST_NITER is the estimated number of iterations of |
1390 the loop, or -1 if no estimate is available. */ | 1391 the loop, or -1 if no estimate is available. */ |
1391 | 1392 |
1392 static unsigned | 1393 static unsigned |
1393 determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, | 1394 determine_unroll_factor (class loop *loop, struct mem_ref_group *refs, |
1394 unsigned ninsns, struct tree_niter_desc *desc, | 1395 unsigned ninsns, class tree_niter_desc *desc, |
1395 HOST_WIDE_INT est_niter) | 1396 HOST_WIDE_INT est_niter) |
1396 { | 1397 { |
1397 unsigned upper_bound; | 1398 unsigned upper_bound; |
1398 unsigned nfactor, factor, mod_constraint; | 1399 unsigned nfactor, factor, mod_constraint; |
1399 struct mem_ref_group *agp; | 1400 struct mem_ref_group *agp; |
1404 from unrolling them enough to make exactly one cache line covered by each | 1405 from unrolling them enough to make exactly one cache line covered by each |
1405 iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent | 1406 iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent |
1406 us from unrolling the loops too many times in cases where we only expect | 1407 us from unrolling the loops too many times in cases where we only expect |
1407 gains from better scheduling and decreasing loop overhead, which is not | 1408 gains from better scheduling and decreasing loop overhead, which is not |
1408 the case here. */ | 1409 the case here. */ |
1409 upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; | 1410 upper_bound = param_max_unrolled_insns / ninsns; |
1410 | 1411 |
1411 /* If we unrolled the loop more times than it iterates, the unrolled version | 1412 /* If we unrolled the loop more times than it iterates, the unrolled version |
1412 of the loop would be never entered. */ | 1413 of the loop would be never entered. */ |
1413 if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound) | 1414 if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound) |
1414 upper_bound = est_niter; | 1415 upper_bound = est_niter; |
1457 /* If several iterations access the same cache line, use the size of | 1458 /* If several iterations access the same cache line, use the size of |
1458 the line divided by this number. Otherwise, a cache line is | 1459 the line divided by this number. Otherwise, a cache line is |
1459 accessed in each iteration. TODO -- in the latter case, we should | 1460 accessed in each iteration. TODO -- in the latter case, we should |
1460 take the size of the reference into account, rounding it up on cache | 1461 take the size of the reference into account, rounding it up on cache |
1461 line size multiple. */ | 1462 line size multiple. */ |
1462 volume += L1_CACHE_LINE_SIZE / ref->prefetch_mod; | 1463 volume += param_l1_cache_line_size / ref->prefetch_mod; |
1463 } | 1464 } |
1464 return volume; | 1465 return volume; |
1465 } | 1466 } |
1466 | 1467 |
1467 /* Returns the volume of memory references accessed across VEC iterations of | 1468 /* Returns the volume of memory references accessed across VEC iterations of |
1491 at the position corresponding to the loop of the step. N is the depth | 1492 at the position corresponding to the loop of the step. N is the depth |
1492 of the considered loop nest, and, LOOP is its innermost loop. */ | 1493 of the considered loop nest, and, LOOP is its innermost loop. */ |
1493 | 1494 |
1494 static void | 1495 static void |
1495 add_subscript_strides (tree access_fn, unsigned stride, | 1496 add_subscript_strides (tree access_fn, unsigned stride, |
1496 HOST_WIDE_INT *strides, unsigned n, struct loop *loop) | 1497 HOST_WIDE_INT *strides, unsigned n, class loop *loop) |
1497 { | 1498 { |
1498 struct loop *aloop; | 1499 class loop *aloop; |
1499 tree step; | 1500 tree step; |
1500 HOST_WIDE_INT astep; | 1501 HOST_WIDE_INT astep; |
1501 unsigned min_depth = loop_depth (loop) - n; | 1502 unsigned min_depth = loop_depth (loop) - n; |
1502 | 1503 |
1503 while (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) | 1504 while (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) |
1510 continue; | 1511 continue; |
1511 | 1512 |
1512 if (tree_fits_shwi_p (step)) | 1513 if (tree_fits_shwi_p (step)) |
1513 astep = tree_to_shwi (step); | 1514 astep = tree_to_shwi (step); |
1514 else | 1515 else |
1515 astep = L1_CACHE_LINE_SIZE; | 1516 astep = param_l1_cache_line_size; |
1516 | 1517 |
1517 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride; | 1518 strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride; |
1518 | 1519 |
1519 } | 1520 } |
1520 } | 1521 } |
1524 loops, and LOOP_SIZES contains the volumes of accesses in each of the | 1525 loops, and LOOP_SIZES contains the volumes of accesses in each of the |
1525 loops. LOOP is the innermost loop of the current loop nest. */ | 1526 loops. LOOP is the innermost loop of the current loop nest. */ |
1526 | 1527 |
1527 static unsigned | 1528 static unsigned |
1528 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, | 1529 self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, |
1529 struct loop *loop) | 1530 class loop *loop) |
1530 { | 1531 { |
1531 tree stride, access_fn; | 1532 tree stride, access_fn; |
1532 HOST_WIDE_INT *strides, astride; | 1533 HOST_WIDE_INT *strides, astride; |
1533 vec<tree> access_fns; | 1534 vec<tree> access_fns; |
1534 tree ref = DR_REF (dr); | 1535 tree ref = DR_REF (dr); |
1560 { | 1561 { |
1561 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); | 1562 stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); |
1562 if (tree_fits_uhwi_p (stride)) | 1563 if (tree_fits_uhwi_p (stride)) |
1563 astride = tree_to_uhwi (stride); | 1564 astride = tree_to_uhwi (stride); |
1564 else | 1565 else |
1565 astride = L1_CACHE_LINE_SIZE; | 1566 astride = param_l1_cache_line_size; |
1566 | 1567 |
1567 ref = TREE_OPERAND (ref, 0); | 1568 ref = TREE_OPERAND (ref, 0); |
1568 } | 1569 } |
1569 else | 1570 else |
1570 astride = 1; | 1571 astride = 1; |
1576 { | 1577 { |
1577 unsigned HOST_WIDE_INT s; | 1578 unsigned HOST_WIDE_INT s; |
1578 | 1579 |
1579 s = strides[i] < 0 ? -strides[i] : strides[i]; | 1580 s = strides[i] < 0 ? -strides[i] : strides[i]; |
1580 | 1581 |
1581 if (s < (unsigned) L1_CACHE_LINE_SIZE | 1582 if (s < (unsigned) param_l1_cache_line_size |
1582 && (loop_sizes[i] | 1583 && (loop_sizes[i] |
1583 > (unsigned) (L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION))) | 1584 > (unsigned) (L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION))) |
1584 { | 1585 { |
1585 ret = loop_sizes[i]; | 1586 ret = loop_sizes[i]; |
1586 break; | 1587 break; |
1594 /* Determines the distance till the first reuse of each reference in REFS | 1595 /* Determines the distance till the first reuse of each reference in REFS |
1595 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other | 1596 in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other |
1596 memory references in the loop. Return false if the analysis fails. */ | 1597 memory references in the loop. Return false if the analysis fails. */ |
1597 | 1598 |
1598 static bool | 1599 static bool |
1599 determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, | 1600 determine_loop_nest_reuse (class loop *loop, struct mem_ref_group *refs, |
1600 bool no_other_refs) | 1601 bool no_other_refs) |
1601 { | 1602 { |
1602 struct loop *nest, *aloop; | 1603 class loop *nest, *aloop; |
1603 vec<data_reference_p> datarefs = vNULL; | 1604 vec<data_reference_p> datarefs = vNULL; |
1604 vec<ddr_p> dependences = vNULL; | 1605 vec<ddr_p> dependences = vNULL; |
1605 struct mem_ref_group *gr; | 1606 struct mem_ref_group *gr; |
1606 struct mem_ref *ref, *refb; | 1607 struct mem_ref *ref, *refb; |
1607 auto_vec<loop_p> vloops; | 1608 auto_vec<loop_p> vloops; |
1823 TODO: Implement a more precise computation by estimating the time | 1824 TODO: Implement a more precise computation by estimating the time |
1824 for each CPU or memory op in the loop. Time estimates for memory ops | 1825 for each CPU or memory op in the loop. Time estimates for memory ops |
1825 should account for cache misses. */ | 1826 should account for cache misses. */ |
1826 insn_to_mem_ratio = ninsns / mem_ref_count; | 1827 insn_to_mem_ratio = ninsns / mem_ref_count; |
1827 | 1828 |
1828 if (insn_to_mem_ratio < PREFETCH_MIN_INSN_TO_MEM_RATIO) | 1829 if (insn_to_mem_ratio < param_prefetch_min_insn_to_mem_ratio) |
1829 { | 1830 { |
1830 if (dump_file && (dump_flags & TDF_DETAILS)) | 1831 if (dump_file && (dump_flags & TDF_DETAILS)) |
1831 fprintf (dump_file, | 1832 fprintf (dump_file, |
1832 "Not prefetching -- instruction to memory reference ratio (%d) too small\n", | 1833 "Not prefetching -- instruction to memory reference ratio (%d) too small\n", |
1833 insn_to_mem_ratio); | 1834 insn_to_mem_ratio); |
1860 loop will usually be significantly smaller than the number of insns of the | 1861 loop will usually be significantly smaller than the number of insns of the |
1861 original loop * unroll_factor (at least the induction variable increases | 1862 original loop * unroll_factor (at least the induction variable increases |
1862 and the exit branches will get eliminated), so it might be better to use | 1863 and the exit branches will get eliminated), so it might be better to use |
1863 tree_estimate_loop_size + estimated_unrolled_size. */ | 1864 tree_estimate_loop_size + estimated_unrolled_size. */ |
1864 insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; | 1865 insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; |
1865 if (insn_to_prefetch_ratio < MIN_INSN_TO_PREFETCH_RATIO) | 1866 if (insn_to_prefetch_ratio < param_min_insn_to_prefetch_ratio) |
1866 { | 1867 { |
1867 if (dump_file && (dump_flags & TDF_DETAILS)) | 1868 if (dump_file && (dump_flags & TDF_DETAILS)) |
1868 fprintf (dump_file, | 1869 fprintf (dump_file, |
1869 "Not prefetching -- instruction to prefetch ratio (%d) too small\n", | 1870 "Not prefetching -- instruction to prefetch ratio (%d) too small\n", |
1870 insn_to_prefetch_ratio); | 1871 insn_to_prefetch_ratio); |
1877 | 1878 |
1878 /* Issue prefetch instructions for array references in LOOP. Returns | 1879 /* Issue prefetch instructions for array references in LOOP. Returns |
1879 true if the LOOP was unrolled. */ | 1880 true if the LOOP was unrolled. */ |
1880 | 1881 |
1881 static bool | 1882 static bool |
1882 loop_prefetch_arrays (struct loop *loop) | 1883 loop_prefetch_arrays (class loop *loop) |
1883 { | 1884 { |
1884 struct mem_ref_group *refs; | 1885 struct mem_ref_group *refs; |
1885 unsigned ahead, ninsns, time, unroll_factor; | 1886 unsigned ahead, ninsns, time, unroll_factor; |
1886 HOST_WIDE_INT est_niter; | 1887 HOST_WIDE_INT est_niter; |
1887 struct tree_niter_desc desc; | 1888 class tree_niter_desc desc; |
1888 bool unrolled = false, no_other_refs; | 1889 bool unrolled = false, no_other_refs; |
1889 unsigned prefetch_count; | 1890 unsigned prefetch_count; |
1890 unsigned mem_ref_count; | 1891 unsigned mem_ref_count; |
1891 | 1892 |
1892 if (optimize_loop_nest_for_size_p (loop)) | 1893 if (optimize_loop_nest_for_size_p (loop)) |
1900 the loop body. */ | 1901 the loop body. */ |
1901 time = tree_num_loop_insns (loop, &eni_time_weights); | 1902 time = tree_num_loop_insns (loop, &eni_time_weights); |
1902 if (time == 0) | 1903 if (time == 0) |
1903 return false; | 1904 return false; |
1904 | 1905 |
1905 ahead = (PREFETCH_LATENCY + time - 1) / time; | 1906 ahead = (param_prefetch_latency + time - 1) / time; |
1906 est_niter = estimated_stmt_executions_int (loop); | 1907 est_niter = estimated_stmt_executions_int (loop); |
1907 if (est_niter == -1) | 1908 if (est_niter == -1) |
1908 est_niter = likely_max_stmt_executions_int (loop); | 1909 est_niter = likely_max_stmt_executions_int (loop); |
1909 | 1910 |
1910 /* Prefetching is not likely to be profitable if the trip count to ahead | 1911 /* Prefetching is not likely to be profitable if the trip count to ahead |
1980 /* Issue prefetch instructions for array references in loops. */ | 1981 /* Issue prefetch instructions for array references in loops. */ |
1981 | 1982 |
1982 unsigned int | 1983 unsigned int |
1983 tree_ssa_prefetch_arrays (void) | 1984 tree_ssa_prefetch_arrays (void) |
1984 { | 1985 { |
1985 struct loop *loop; | 1986 class loop *loop; |
1986 bool unrolled = false; | 1987 bool unrolled = false; |
1987 int todo_flags = 0; | 1988 int todo_flags = 0; |
1988 | 1989 |
1989 if (!targetm.have_prefetch () | 1990 if (!targetm.have_prefetch () |
1990 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. | 1991 /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. |
1996 | 1997 |
1997 if (dump_file && (dump_flags & TDF_DETAILS)) | 1998 if (dump_file && (dump_flags & TDF_DETAILS)) |
1998 { | 1999 { |
1999 fprintf (dump_file, "Prefetching parameters:\n"); | 2000 fprintf (dump_file, "Prefetching parameters:\n"); |
2000 fprintf (dump_file, " simultaneous prefetches: %d\n", | 2001 fprintf (dump_file, " simultaneous prefetches: %d\n", |
2001 SIMULTANEOUS_PREFETCHES); | 2002 param_simultaneous_prefetches); |
2002 fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY); | 2003 fprintf (dump_file, " prefetch latency: %d\n", param_prefetch_latency); |
2003 fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); | 2004 fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); |
2004 fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", | 2005 fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", |
2005 L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE); | 2006 L1_CACHE_SIZE_BYTES / param_l1_cache_line_size, |
2006 fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); | 2007 param_l1_cache_size); |
2007 fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); | 2008 fprintf (dump_file, " L1 cache line size: %d\n", |
2009 param_l1_cache_line_size); | |
2010 fprintf (dump_file, " L2 cache size: %d kB\n", param_l2_cache_size); | |
2008 fprintf (dump_file, " min insn-to-prefetch ratio: %d \n", | 2011 fprintf (dump_file, " min insn-to-prefetch ratio: %d \n", |
2009 MIN_INSN_TO_PREFETCH_RATIO); | 2012 param_min_insn_to_prefetch_ratio); |
2010 fprintf (dump_file, " min insn-to-mem ratio: %d \n", | 2013 fprintf (dump_file, " min insn-to-mem ratio: %d \n", |
2011 PREFETCH_MIN_INSN_TO_MEM_RATIO); | 2014 param_prefetch_min_insn_to_mem_ratio); |
2012 fprintf (dump_file, "\n"); | 2015 fprintf (dump_file, "\n"); |
2013 } | 2016 } |
2014 | 2017 |
2015 initialize_original_copy_tables (); | 2018 initialize_original_copy_tables (); |
2016 | 2019 |