Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-ssa-loop-prefetch.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/tree-ssa-loop-prefetch.c Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/tree-ssa-loop-prefetch.c Fri Oct 27 22:46:09 2017 +0900 @@ -1,5 +1,5 @@ /* Array prefetching. - Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2005-2017 Free Software Foundation, Inc. This file is part of GCC. @@ -20,33 +20,35 @@ #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" #include "tree.h" -#include "tm_p.h" -#include "basic-block.h" -#include "output.h" +#include "gimple.h" +#include "predict.h" +#include "tree-pass.h" +#include "gimple-ssa.h" +#include "optabs-query.h" #include "tree-pretty-print.h" -#include "tree-flow.h" -#include "tree-dump.h" -#include "timevar.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "tree-ssa-loop-ivopts.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop-niter.h" +#include "tree-ssa-loop.h" +#include "ssa.h" +#include "tree-into-ssa.h" #include "cfgloop.h" -#include "tree-pass.h" -#include "insn-config.h" -#include "recog.h" -#include "hashtab.h" -#include "tree-chrec.h" #include "tree-scalar-evolution.h" -#include "diagnostic-core.h" #include "params.h" #include "langhooks.h" #include "tree-inline.h" #include "tree-data-ref.h" - - -/* FIXME: Needed for optabs, but this should all be moved to a TBD interface - between the GIMPLE and RTL worlds. */ -#include "expr.h" -#include "optabs.h" +#include "diagnostic-core.h" +#include "dbgcnt.h" /* This pass inserts prefetch instructions to optimize cache usage during accesses to arrays in loops. It processes loops sequentially and: @@ -189,10 +191,6 @@ #define ACCEPTABLE_MISS_RATE 50 #endif -#ifndef HAVE_prefetch -#define HAVE_prefetch 0 -#endif - #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024)) #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024)) @@ -230,11 +228,12 @@ tree step; /* Step of the reference. */ struct mem_ref *refs; /* References in the group. */ struct mem_ref_group *next; /* Next group of references. */ + unsigned int uid; /* Group UID, used only for debugging. */ }; /* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched. */ -#define PREFETCH_ALL (~(unsigned HOST_WIDE_INT) 0) +#define PREFETCH_ALL HOST_WIDE_INT_M1U /* Do not generate a prefetch if the unroll factor is significantly less than what is required by the prefetch. This is to avoid redundant @@ -259,7 +258,7 @@ struct mem_ref { - gimple stmt; /* Statement in that the reference appears. */ + gimple *stmt; /* Statement in that the reference appears. */ tree mem; /* The reference. */ HOST_WIDE_INT delta; /* Constant offset of the reference. */ struct mem_ref_group *group; /* The group of references it belongs to. */ @@ -272,6 +271,7 @@ unsigned reuse_distance; /* The amount of data accessed before the first reuse of this value. */ struct mem_ref *next; /* The next reference in the group. */ + unsigned int uid; /* Ref UID, used only for debugging. */ unsigned write_p : 1; /* Is it a write? */ unsigned independent_p : 1; /* True if the reference is independent on all other references inside the loop. */ @@ -280,29 +280,31 @@ nontemporal one. */ }; +/* Dumps information about memory reference */ +static void +dump_mem_details (FILE *file, tree base, tree step, + HOST_WIDE_INT delta, bool write_p) +{ + fprintf (file, "(base "); + print_generic_expr (file, base, TDF_SLIM); + fprintf (file, ", step "); + if (cst_and_fits_in_hwi (step)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (step)); + else + print_generic_expr (file, step, TDF_SLIM); + fprintf (file, ")\n"); + fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n", delta); + fprintf (file, " %s\n\n", write_p ? "write" : "read"); +} + /* Dumps information about reference REF to FILE. */ static void dump_mem_ref (FILE *file, struct mem_ref *ref) { - fprintf (file, "Reference %p:\n", (void *) ref); - - fprintf (file, " group %p (base ", (void *) ref->group); - print_generic_expr (file, ref->group->base, TDF_SLIM); - fprintf (file, ", step "); - if (cst_and_fits_in_hwi (ref->group->step)) - fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (ref->group->step)); - else - print_generic_expr (file, ref->group->step, TDF_TREE); + fprintf (file, "reference %u:%u (", ref->group->uid, ref->uid); + print_generic_expr (file, ref->mem, TDF_SLIM); fprintf (file, ")\n"); - - fprintf (file, " delta "); - fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->delta); - fprintf (file, "\n"); - - fprintf (file, " %s\n", ref->write_p ? "write" : "read"); - - fprintf (file, "\n"); } /* Finds a group with BASE and STEP in GROUPS, or creates one if it does not @@ -311,6 +313,9 @@ static struct mem_ref_group * find_or_create_group (struct mem_ref_group **groups, tree base, tree step) { + /* Global count for setting struct mem_ref_group->uid. */ + static unsigned int last_mem_ref_group_uid = 0; + struct mem_ref_group *group; for (; *groups; groups = &(*groups)->next) @@ -321,8 +326,8 @@ /* If step is an integer constant, keep the list of groups sorted by decreasing step. */ - if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step) - && int_cst_value ((*groups)->step) < int_cst_value (step)) + if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step) + && int_cst_value ((*groups)->step) < int_cst_value (step)) break; } @@ -330,6 +335,7 @@ group->base = base; group->step = step; group->refs = NULL; + group->uid = ++last_mem_ref_group_uid; group->next = *groups; *groups = group; @@ -340,14 +346,17 @@ WRITE_P. The reference occurs in statement STMT. */ static void -record_ref (struct mem_ref_group *group, gimple stmt, tree mem, +record_ref (struct mem_ref_group *group, gimple *stmt, tree mem, HOST_WIDE_INT delta, bool write_p) { + unsigned int last_mem_ref_uid = 0; struct mem_ref **aref; /* Do not record the same address twice. */ for (aref = &group->refs; *aref; aref = &(*aref)->next) { + last_mem_ref_uid = (*aref)->uid; + /* It does not have to be possible for write reference to reuse the read prefetch, or vice versa. */ if (!WRITE_CAN_USE_READ_PREFETCH @@ -376,9 +385,16 @@ (*aref)->next = NULL; (*aref)->independent_p = false; (*aref)->storent_p = false; + (*aref)->uid = last_mem_ref_uid + 1; if (dump_file && (dump_flags & TDF_DETAILS)) - dump_mem_ref (dump_file, *aref); + { + dump_mem_ref (dump_file, *aref); + + fprintf (dump_file, " group %u ", group->uid); + dump_mem_details (dump_file, group->base, group->step, delta, + write_p); + } } /* Release memory references in GROUPS. */ @@ -406,7 +422,7 @@ struct ar_data { struct loop *loop; /* Loop of the reference. */ - gimple stmt; /* Statement of the reference. */ + gimple *stmt; /* Statement of the reference. */ tree *step; /* Step of the memory reference. */ HOST_WIDE_INT *delta; /* Offset of the memory reference. */ }; @@ -472,7 +488,7 @@ static bool analyze_ref (struct loop *loop, tree *ref_p, tree *base, tree *step, HOST_WIDE_INT *delta, - gimple stmt) + gimple *stmt) { struct ar_data ar_data; tree off; @@ -520,7 +536,7 @@ static bool gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, - tree ref, bool write_p, gimple stmt) + tree ref, bool write_p, gimple *stmt) { tree base, step; HOST_WIDE_INT delta; @@ -539,9 +555,44 @@ if (may_be_nonaddressable_p (base)) return false; - /* Limit non-constant step prefetching only to the innermost loops. */ - if (!cst_and_fits_in_hwi (step) && loop->inner != NULL) - return false; + /* Limit non-constant step prefetching only to the innermost loops and + only when the step is loop invariant in the entire loop nest. */ + if (!cst_and_fits_in_hwi (step)) + { + if (loop->inner != NULL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Memory expression %p\n",(void *) ref ); + print_generic_expr (dump_file, ref, TDF_SLIM); + fprintf (dump_file,":"); + dump_mem_details (dump_file, base, step, delta, write_p); + fprintf (dump_file, + "Ignoring %p, non-constant step prefetching is " + "limited to inner most loops \n", + (void *) ref); + } + return false; + } + else + { + if (!expr_invariant_in_loop_p (loop_outermost (loop), step)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Memory expression %p\n",(void *) ref ); + print_generic_expr (dump_file, ref, TDF_SLIM); + fprintf (dump_file,":"); + dump_mem_details (dump_file, base, step, delta, write_p); + fprintf (dump_file, + "Not prefetching, ignoring %p due to " + "loop variant step\n", + (void *) ref); + } + return false; + } + } + } /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP are integer constants. */ @@ -561,7 +612,7 @@ basic_block bb; unsigned i; gimple_stmt_iterator bsi; - gimple stmt; + gimple *stmt; tree lhs, rhs; struct mem_ref_group *refs = NULL; @@ -589,6 +640,9 @@ continue; } + if (! gimple_vuse (stmt)) + continue; + lhs = gimple_assign_lhs (stmt); rhs = gimple_assign_rhs1 (stmt); @@ -658,9 +712,9 @@ gcc_assert (by > 0); if (x >= 0) - return x / by; + return x / (HOST_WIDE_INT) by; else - return (x + by - 1) / by; + return (x + (HOST_WIDE_INT) by - 1) / (HOST_WIDE_INT) by; } /* Given a CACHE_LINE_SIZE and two inductive memory references @@ -794,7 +848,7 @@ prefetch_before = (hit_from - delta_r + step - 1) / step; /* Do not reduce prefetch_before if we meet beyond cache size. */ - if (prefetch_before > (unsigned) abs (L2_CACHE_SIZE_BYTES / step)) + if (prefetch_before > absu_hwi (L2_CACHE_SIZE_BYTES / step)) prefetch_before = PREFETCH_ALL; if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; @@ -895,7 +949,7 @@ if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Reference %p:", (void *) ref_pruned); + dump_mem_ref (dump_file, ref_pruned); if (ref_pruned->prefetch_before == PREFETCH_ALL && ref_pruned->prefetch_mod == 1) @@ -943,8 +997,8 @@ if (ref->prefetch_before != PREFETCH_ALL) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Ignoring %p due to prefetch_before\n", - (void *) ref); + fprintf (dump_file, "Ignoring reference %u:%u due to prefetch_before\n", + ref->group->uid, ref->uid); return false; } @@ -952,7 +1006,7 @@ if (ref->storent_p) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Ignoring nontemporal store %p\n", (void *) ref); + fprintf (dump_file, "Ignoring nontemporal store reference %u:%u\n", ref->group->uid, ref->uid); return false; } @@ -1015,7 +1069,14 @@ if (2 * remaining_prefetch_slots < prefetch_slots) continue; + /* Stop prefetching if debug counter is activated. */ + if (!dbg_cnt (prefetch)) + continue; + ref->issue_prefetch_p = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Decided to issue prefetch for reference %u:%u\n", + ref->group->uid, ref->uid); if (remaining_prefetch_slots <= prefetch_slots) return true; @@ -1073,15 +1134,15 @@ { HOST_WIDE_INT delta; tree addr, addr_base, write_p, local, forward; - gimple prefetch; + gcall *prefetch; gimple_stmt_iterator bsi; unsigned n_prefetches, ap; bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Issued%s prefetch for %p.\n", + fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n", nontemporal ? " nontemporal" : "", - (void *) ref); + ref->group->uid, ref->uid); bsi = gsi_for_stmt (ref->stmt); @@ -1100,10 +1161,9 @@ /* Determine the address to prefetch. */ delta = (ahead + ap * ref->prefetch_mod) * int_cst_value (ref->group->step); - addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, - addr_base, size_int (delta)); - addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, - true, GSI_SAME_STMT); + addr = fold_build_pointer_plus_hwi (addr_base, delta); + addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, + NULL, true, GSI_SAME_STMT); } else { @@ -1112,13 +1172,24 @@ forward = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, ref->group->step), fold_convert (sizetype, size_int (ahead))); - addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, addr_base, - forward); + addr = fold_build_pointer_plus (addr_base, forward); addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, true, GSI_SAME_STMT); } + + if (addr_base != addr + && TREE_CODE (addr_base) == SSA_NAME + && TREE_CODE (addr) == SSA_NAME) + { + duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base)); + /* As this isn't a plain copy we have to reset alignment + information. */ + if (SSA_NAME_PTR_INFO (addr)) + mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr)); + } + /* Create the prefetch instruction. */ - prefetch = gimple_build_call (built_in_decls[BUILT_IN_PREFETCH], + prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), 3, addr, write_p, local); gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT); } @@ -1146,7 +1217,7 @@ static bool nontemporal_store_p (struct mem_ref *ref) { - enum machine_mode mode; + machine_mode mode; enum insn_code code; /* REF must be a write that is not reused. We require it to be independent @@ -1176,8 +1247,8 @@ return false; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Marked reference %p as a nontemporal store.\n", - (void *) ref); + fprintf (dump_file, "Marked reference %u:%u as a nontemporal store.\n", + ref->group->uid, ref->uid); gimple_assign_set_nontemporal_move (ref->stmt, true); ref->storent_p = true; @@ -1190,13 +1261,13 @@ static void emit_mfence_after_loop (struct loop *loop) { - VEC (edge, heap) *exits = get_loop_exit_edges (loop); + vec<edge> exits = get_loop_exit_edges (loop); edge exit; - gimple call; + gcall *call; gimple_stmt_iterator bsi; unsigned i; - FOR_EACH_VEC_ELT (edge, exits, i, exit) + FOR_EACH_VEC_ELT (exits, i, exit) { call = gimple_build_call (FENCE_FOLLOWING_MOVNT, 0); @@ -1208,10 +1279,9 @@ bsi = gsi_after_labels (exit->dest); gsi_insert_before (&bsi, call, GSI_NEW_STMT); - mark_virtual_ops_for_renaming (call); } - VEC_free (edge, heap, exits); + exits.release (); update_ssa (TODO_update_ssa_only_virtuals); } @@ -1229,16 +1299,16 @@ is a suitable place for it at each of the loop exits. */ if (FENCE_FOLLOWING_MOVNT != NULL_TREE) { - VEC (edge, heap) *exits = get_loop_exit_edges (loop); + vec<edge> exits = get_loop_exit_edges (loop); unsigned i; edge exit; - FOR_EACH_VEC_ELT (edge, exits, i, exit) + FOR_EACH_VEC_ELT (exits, i, exit) if ((exit->flags & EDGE_ABNORMAL) - && exit->dest == EXIT_BLOCK_PTR) + && exit->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) ret = false; - VEC_free (edge, heap, exits); + exits.release (); } return ret; @@ -1288,7 +1358,7 @@ /* Determine the coefficient by that unroll LOOP, from the information contained in the list of memory references REFS. Description of - umber of iterations of LOOP is stored to DESC. NINSNS is the number of + number of iterations of LOOP is stored to DESC. NINSNS is the number of insns of the LOOP. EST_NITER is the estimated number of iterations of the loop, or -1 if no estimate is available. */ @@ -1412,8 +1482,8 @@ if ((unsigned) loop_depth (aloop) <= min_depth) continue; - if (host_integerp (step, 0)) - astep = tree_low_cst (step, 0); + if (tree_fits_shwi_p (step)) + astep = tree_to_shwi (step); else astep = L1_CACHE_LINE_SIZE; @@ -1433,7 +1503,7 @@ { tree stride, access_fn; HOST_WIDE_INT *strides, astride; - VEC (tree, heap) *access_fns; + vec<tree> access_fns; tree ref = DR_REF (dr); unsigned i, ret = ~0u; @@ -1452,7 +1522,7 @@ strides = XCNEWVEC (HOST_WIDE_INT, n); access_fns = DR_ACCESS_FNS (dr); - FOR_EACH_VEC_ELT (tree, access_fns, i, access_fn) + FOR_EACH_VEC_ELT (access_fns, i, access_fn) { /* Keep track of the reference corresponding to the subscript, so that we know its stride. */ @@ -1462,8 +1532,8 @@ if (TREE_CODE (ref) == ARRAY_REF) { stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); - if (host_integerp (stride, 1)) - astride = tree_low_cst (stride, 1); + if (tree_fits_uhwi_p (stride)) + astride = tree_to_uhwi (stride); else astride = L1_CACHE_LINE_SIZE; @@ -1496,18 +1566,18 @@ /* Determines the distance till the first reuse of each reference in REFS in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other - memory references in the loop. */ + memory references in the loop. Return false if the analysis fails. */ -static void +static bool determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, bool no_other_refs) { struct loop *nest, *aloop; - VEC (data_reference_p, heap) *datarefs = NULL; - VEC (ddr_p, heap) *dependences = NULL; + vec<data_reference_p> datarefs = vNULL; + vec<ddr_p> dependences = vNULL; struct mem_ref_group *gr; struct mem_ref *ref, *refb; - VEC (loop_p, heap) *vloops = NULL; + auto_vec<loop_p> vloops; unsigned *loop_data_size; unsigned i, j, n; unsigned volume, dist, adist; @@ -1516,7 +1586,7 @@ ddr_p dep; if (loop->inner) - return; + return true; /* Find the outermost loop of the loop nest of loop (we require that there are no sibling loops inside the nest). */ @@ -1536,7 +1606,7 @@ We use this to estimate whether the reference is evicted from the cache before its reuse. */ find_loop_nest (nest, &vloops); - n = VEC_length (loop_p, vloops); + n = vloops.length (); loop_data_size = XNEWVEC (unsigned, n); volume = volume_of_references (refs); i = n; @@ -1548,9 +1618,9 @@ if (volume > L2_CACHE_SIZE_BYTES) continue; - aloop = VEC_index (loop_p, vloops, i); - vol = estimated_loop_iterations_int (aloop, false); - if (vol < 0) + aloop = vloops[i]; + vol = estimated_stmt_executions_int (aloop); + if (vol == -1) vol = expected_loop_iterations (aloop); volume *= vol; } @@ -1562,20 +1632,21 @@ for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) { - dr = create_data_ref (nest, loop_containing_stmt (ref->stmt), - ref->mem, ref->stmt, !ref->write_p); + dr = create_data_ref (loop_preheader_edge (nest), + loop_containing_stmt (ref->stmt), + ref->mem, ref->stmt, !ref->write_p, false); if (dr) { ref->reuse_distance = volume; dr->aux = ref; - VEC_safe_push (data_reference_p, heap, datarefs, dr); + datarefs.safe_push (dr); } else no_other_refs = false; } - FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) + FOR_EACH_VEC_ELT (datarefs, i, dr) { dist = self_reuse_distance (dr, loop_data_size, n, loop); ref = (struct mem_ref *) dr->aux; @@ -1586,9 +1657,10 @@ ref->independent_p = true; } - compute_all_dependences (datarefs, &dependences, vloops, true); + if (!compute_all_dependences (datarefs, &dependences, vloops, true)) + return false; - FOR_EACH_VEC_ELT (ddr_p, dependences, i, dep) + FOR_EACH_VEC_ELT (dependences, i, dep) { if (DDR_ARE_DEPENDENT (dep) == chrec_known) continue; @@ -1597,6 +1669,7 @@ refb = (struct mem_ref *) DDR_B (dep)->aux; if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know + || DDR_COULD_BE_INDEPENDENT_P (dep) || DDR_NUM_DIST_VECTS (dep) == 0) { /* If the dependence cannot be analyzed, assume that there might be @@ -1662,9 +1735,11 @@ fprintf (dump_file, "Reuse distances:\n"); for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) - fprintf (dump_file, " ref %p distance %u\n", - (void *) ref, ref->reuse_distance); + fprintf (dump_file, " reference %u:%u distance %u\n", + ref->group->uid, ref->uid, ref->reuse_distance); } + + return true; } /* Determine whether or not the trip count to ahead ratio is too small based @@ -1801,7 +1876,9 @@ return false; ahead = (PREFETCH_LATENCY + time - 1) / time; - est_niter = estimated_loop_iterations_int (loop, false); + est_niter = estimated_stmt_executions_int (loop); + if (est_niter == -1) + est_niter = likely_max_stmt_executions_int (loop); /* Prefetching is not likely to be profitable if the trip count to ahead ratio is too small. */ @@ -1825,7 +1902,8 @@ if (nothing_to_prefetch_p (refs)) goto fail; - determine_loop_nest_reuse (loop, refs, no_other_refs); + if (!determine_loop_nest_reuse (loop, refs, no_other_refs)) + goto fail; /* Step 3: determine unroll factor. */ unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, @@ -1877,16 +1955,15 @@ unsigned int tree_ssa_prefetch_arrays (void) { - loop_iterator li; struct loop *loop; bool unrolled = false; int todo_flags = 0; - if (!HAVE_prefetch + if (!targetm.have_prefetch () /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part of processor costs and i486 does not have prefetch, but - -march=pentium4 causes HAVE_prefetch to be true. Ugh. */ + -march=pentium4 causes targetm.have_prefetch to be true. Ugh. */ || PREFETCH_BLOCK == 0) return 0; @@ -1910,7 +1987,7 @@ initialize_original_copy_tables (); - if (!built_in_decls[BUILT_IN_PREFETCH]) + if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH)) { tree type = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); @@ -1918,14 +1995,10 @@ BUILT_IN_PREFETCH, BUILT_IN_NORMAL, NULL, NULL_TREE); DECL_IS_NOVOPS (decl) = true; - built_in_decls[BUILT_IN_PREFETCH] = decl; + set_builtin_decl (BUILT_IN_PREFETCH, decl, false); } - /* We assume that size of cache line is a power of two, so verify this - here. */ - gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0); - - FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing loop %d:\n", loop->num); @@ -1945,3 +2018,66 @@ free_original_copy_tables (); return todo_flags; } + +/* Prefetching. */ + +namespace { + +const pass_data pass_data_loop_prefetch = +{ + GIMPLE_PASS, /* type */ + "aprefetch", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_TREE_PREFETCH, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_loop_prefetch : public gimple_opt_pass +{ +public: + pass_loop_prefetch (gcc::context *ctxt) + : gimple_opt_pass (pass_data_loop_prefetch, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return flag_prefetch_loop_arrays > 0; } + virtual unsigned int execute (function *); + +}; // class pass_loop_prefetch + +unsigned int +pass_loop_prefetch::execute (function *fun) +{ + if (number_of_loops (fun) <= 1) + return 0; + + if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0) + { + static bool warned = false; + + if (!warned) + { + warning (OPT_Wdisabled_optimization, + "%<l1-cache-size%> parameter is not a power of two %d", + PREFETCH_BLOCK); + warned = true; + } + return 0; + } + + return tree_ssa_prefetch_arrays (); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_loop_prefetch (gcc::context *ctxt) +{ + return new pass_loop_prefetch (ctxt); +} + +