diff gcc/tree-ssa-loop-prefetch.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/tree-ssa-loop-prefetch.c	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/tree-ssa-loop-prefetch.c	Fri Oct 27 22:46:09 2017 +0900
@@ -1,5 +1,5 @@
 /* Array prefetching.
-   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2005-2017 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -20,33 +20,35 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
 #include "tree.h"
-#include "tm_p.h"
-#include "basic-block.h"
-#include "output.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "gimple-ssa.h"
+#include "optabs-query.h"
 #include "tree-pretty-print.h"
-#include "tree-flow.h"
-#include "tree-dump.h"
-#include "timevar.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-into-ssa.h"
 #include "cfgloop.h"
-#include "tree-pass.h"
-#include "insn-config.h"
-#include "recog.h"
-#include "hashtab.h"
-#include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
-#include "diagnostic-core.h"
 #include "params.h"
 #include "langhooks.h"
 #include "tree-inline.h"
 #include "tree-data-ref.h"
-
-
-/* FIXME: Needed for optabs, but this should all be moved to a TBD interface
-   between the GIMPLE and RTL worlds.  */
-#include "expr.h"
-#include "optabs.h"
+#include "diagnostic-core.h"
+#include "dbgcnt.h"
 
 /* This pass inserts prefetch instructions to optimize cache usage during
    accesses to arrays in loops.  It processes loops sequentially and:
@@ -189,10 +191,6 @@
 #define ACCEPTABLE_MISS_RATE 50
 #endif
 
-#ifndef HAVE_prefetch
-#define HAVE_prefetch 0
-#endif
-
 #define L1_CACHE_SIZE_BYTES ((unsigned) (L1_CACHE_SIZE * 1024))
 #define L2_CACHE_SIZE_BYTES ((unsigned) (L2_CACHE_SIZE * 1024))
 
@@ -230,11 +228,12 @@
   tree step;			/* Step of the reference.  */
   struct mem_ref *refs;		/* References in the group.  */
   struct mem_ref_group *next;	/* Next group of references.  */
+  unsigned int uid;		/* Group UID, used only for debugging.  */
 };
 
 /* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched.  */
 
-#define PREFETCH_ALL		(~(unsigned HOST_WIDE_INT) 0)
+#define PREFETCH_ALL		HOST_WIDE_INT_M1U
 
 /* Do not generate a prefetch if the unroll factor is significantly less
    than what is required by the prefetch.  This is to avoid redundant
@@ -259,7 +258,7 @@
 
 struct mem_ref
 {
-  gimple stmt;			/* Statement in that the reference appears.  */
+  gimple *stmt;			/* Statement in that the reference appears.  */
   tree mem;			/* The reference.  */
   HOST_WIDE_INT delta;		/* Constant offset of the reference.  */
   struct mem_ref_group *group;	/* The group of references it belongs to.  */
@@ -272,6 +271,7 @@
   unsigned reuse_distance;	/* The amount of data accessed before the first
 				   reuse of this value.  */
   struct mem_ref *next;		/* The next reference in the group.  */
+  unsigned int uid;		/* Ref UID, used only for debugging.  */
   unsigned write_p : 1;		/* Is it a write?  */
   unsigned independent_p : 1;	/* True if the reference is independent on
 				   all other references inside the loop.  */
@@ -280,29 +280,31 @@
 				   nontemporal one.  */
 };
 
+/* Dumps information about memory reference */
+static void
+dump_mem_details (FILE *file, tree base, tree step,
+	    HOST_WIDE_INT delta, bool write_p) 
+{
+  fprintf (file, "(base ");
+  print_generic_expr (file, base, TDF_SLIM);
+  fprintf (file, ", step ");
+  if (cst_and_fits_in_hwi (step))
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (step));
+  else
+    print_generic_expr (file, step, TDF_SLIM);
+  fprintf (file, ")\n");
+  fprintf (file, "  delta " HOST_WIDE_INT_PRINT_DEC "\n", delta);
+  fprintf (file, "  %s\n\n", write_p ? "write" : "read");
+}
+
 /* Dumps information about reference REF to FILE.  */
 
 static void
 dump_mem_ref (FILE *file, struct mem_ref *ref)
 {
-  fprintf (file, "Reference %p:\n", (void *) ref);
-
-  fprintf (file, "  group %p (base ", (void *) ref->group);
-  print_generic_expr (file, ref->group->base, TDF_SLIM);
-  fprintf (file, ", step ");
-  if (cst_and_fits_in_hwi (ref->group->step))
-    fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (ref->group->step));
-  else
-    print_generic_expr (file, ref->group->step, TDF_TREE);
+  fprintf (file, "reference %u:%u (", ref->group->uid, ref->uid);
+  print_generic_expr (file, ref->mem, TDF_SLIM);
   fprintf (file, ")\n");
-
-  fprintf (file, "  delta ");
-  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->delta);
-  fprintf (file, "\n");
-
-  fprintf (file, "  %s\n", ref->write_p ? "write" : "read");
-
-  fprintf (file, "\n");
 }
 
 /* Finds a group with BASE and STEP in GROUPS, or creates one if it does not
@@ -311,6 +313,9 @@
 static struct mem_ref_group *
 find_or_create_group (struct mem_ref_group **groups, tree base, tree step)
 {
+  /* Global count for setting struct mem_ref_group->uid.  */
+  static unsigned int last_mem_ref_group_uid = 0;
+
   struct mem_ref_group *group;
 
   for (; *groups; groups = &(*groups)->next)
@@ -321,8 +326,8 @@
 
       /* If step is an integer constant, keep the list of groups sorted
          by decreasing step.  */
-        if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step)
-            && int_cst_value ((*groups)->step) < int_cst_value (step))
+      if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step)
+	  && int_cst_value ((*groups)->step) < int_cst_value (step))
 	break;
     }
 
@@ -330,6 +335,7 @@
   group->base = base;
   group->step = step;
   group->refs = NULL;
+  group->uid = ++last_mem_ref_group_uid;
   group->next = *groups;
   *groups = group;
 
@@ -340,14 +346,17 @@
    WRITE_P.  The reference occurs in statement STMT.  */
 
 static void
-record_ref (struct mem_ref_group *group, gimple stmt, tree mem,
+record_ref (struct mem_ref_group *group, gimple *stmt, tree mem,
 	    HOST_WIDE_INT delta, bool write_p)
 {
+  unsigned int last_mem_ref_uid = 0;
   struct mem_ref **aref;
 
   /* Do not record the same address twice.  */
   for (aref = &group->refs; *aref; aref = &(*aref)->next)
     {
+      last_mem_ref_uid = (*aref)->uid;
+
       /* It does not have to be possible for write reference to reuse the read
 	 prefetch, or vice versa.  */
       if (!WRITE_CAN_USE_READ_PREFETCH
@@ -376,9 +385,16 @@
   (*aref)->next = NULL;
   (*aref)->independent_p = false;
   (*aref)->storent_p = false;
+  (*aref)->uid = last_mem_ref_uid + 1;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_mem_ref (dump_file, *aref);
+    {
+      dump_mem_ref (dump_file, *aref);
+
+      fprintf (dump_file, "  group %u ", group->uid);
+      dump_mem_details (dump_file, group->base, group->step, delta,
+			write_p);
+    }
 }
 
 /* Release memory references in GROUPS.  */
@@ -406,7 +422,7 @@
 struct ar_data
 {
   struct loop *loop;			/* Loop of the reference.  */
-  gimple stmt;				/* Statement of the reference.  */
+  gimple *stmt;				/* Statement of the reference.  */
   tree *step;				/* Step of the memory reference.  */
   HOST_WIDE_INT *delta;			/* Offset of the memory reference.  */
 };
@@ -472,7 +488,7 @@
 static bool
 analyze_ref (struct loop *loop, tree *ref_p, tree *base,
 	     tree *step, HOST_WIDE_INT *delta,
-	     gimple stmt)
+	     gimple *stmt)
 {
   struct ar_data ar_data;
   tree off;
@@ -520,7 +536,7 @@
 
 static bool
 gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
-			      tree ref, bool write_p, gimple stmt)
+			      tree ref, bool write_p, gimple *stmt)
 {
   tree base, step;
   HOST_WIDE_INT delta;
@@ -539,9 +555,44 @@
   if (may_be_nonaddressable_p (base))
     return false;
 
-  /* Limit non-constant step prefetching only to the innermost loops.  */
-  if (!cst_and_fits_in_hwi (step) && loop->inner != NULL)
-    return false;
+  /* Limit non-constant step prefetching only to the innermost loops and 
+     only when the step is loop invariant in the entire loop nest. */
+  if (!cst_and_fits_in_hwi (step))
+    {
+      if (loop->inner != NULL)
+        {
+          if (dump_file && (dump_flags & TDF_DETAILS))
+            {
+              fprintf (dump_file, "Memory expression %p\n",(void *) ref ); 
+	      print_generic_expr (dump_file, ref, TDF_SLIM);
+	      fprintf (dump_file,":");
+              dump_mem_details (dump_file, base, step, delta, write_p);
+              fprintf (dump_file, 
+                       "Ignoring %p, non-constant step prefetching is "
+                       "limited to inner most loops \n", 
+                       (void *) ref);
+            }
+            return false;    
+         }
+      else
+        {
+          if (!expr_invariant_in_loop_p (loop_outermost (loop), step))
+          {
+            if (dump_file && (dump_flags & TDF_DETAILS))
+              {
+                fprintf (dump_file, "Memory expression %p\n",(void *) ref );
+		print_generic_expr (dump_file, ref, TDF_SLIM);
+                fprintf (dump_file,":");
+                dump_mem_details (dump_file, base, step, delta, write_p);
+                fprintf (dump_file, 
+                         "Not prefetching, ignoring %p due to "
+                         "loop variant step\n",
+                         (void *) ref);
+              }
+              return false;                 
+            }
+        }
+    }
 
   /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
      are integer constants.  */
@@ -561,7 +612,7 @@
   basic_block bb;
   unsigned i;
   gimple_stmt_iterator bsi;
-  gimple stmt;
+  gimple *stmt;
   tree lhs, rhs;
   struct mem_ref_group *refs = NULL;
 
@@ -589,6 +640,9 @@
 	      continue;
 	    }
 
+	  if (! gimple_vuse (stmt))
+	    continue;
+
 	  lhs = gimple_assign_lhs (stmt);
 	  rhs = gimple_assign_rhs1 (stmt);
 
@@ -658,9 +712,9 @@
   gcc_assert (by > 0);
 
   if (x >= 0)
-    return x / by;
+    return x / (HOST_WIDE_INT) by;
   else
-    return (x + by - 1) / by;
+    return (x + (HOST_WIDE_INT) by - 1) / (HOST_WIDE_INT) by;
 }
 
 /* Given a CACHE_LINE_SIZE and two inductive memory references
@@ -794,7 +848,7 @@
       prefetch_before = (hit_from - delta_r + step - 1) / step;
 
       /* Do not reduce prefetch_before if we meet beyond cache size.  */
-      if (prefetch_before > (unsigned) abs (L2_CACHE_SIZE_BYTES / step))
+      if (prefetch_before > absu_hwi (L2_CACHE_SIZE_BYTES / step))
         prefetch_before = PREFETCH_ALL;
       if (prefetch_before < ref->prefetch_before)
 	ref->prefetch_before = prefetch_before;
@@ -895,7 +949,7 @@
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  fprintf (dump_file, "Reference %p:", (void *) ref_pruned);
+	  dump_mem_ref (dump_file, ref_pruned);
 
 	  if (ref_pruned->prefetch_before == PREFETCH_ALL
 	      && ref_pruned->prefetch_mod == 1)
@@ -943,8 +997,8 @@
   if (ref->prefetch_before != PREFETCH_ALL)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
-        fprintf (dump_file, "Ignoring %p due to prefetch_before\n",
-		 (void *) ref);
+        fprintf (dump_file, "Ignoring reference %u:%u due to prefetch_before\n",
+		 ref->group->uid, ref->uid);
       return false;
     }
 
@@ -952,7 +1006,7 @@
   if (ref->storent_p)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
-        fprintf (dump_file, "Ignoring nontemporal store %p\n", (void *) ref);
+        fprintf (dump_file, "Ignoring nontemporal store reference %u:%u\n", ref->group->uid, ref->uid);
       return false;
     }
 
@@ -1015,7 +1069,14 @@
 	if (2 * remaining_prefetch_slots < prefetch_slots)
 	  continue;
 
+	/* Stop prefetching if debug counter is activated.  */
+	if (!dbg_cnt (prefetch))
+	  continue;
+
 	ref->issue_prefetch_p = true;
+	if (dump_file && (dump_flags & TDF_DETAILS))
+	  fprintf (dump_file, "Decided to issue prefetch for reference %u:%u\n",
+		   ref->group->uid, ref->uid);
 
 	if (remaining_prefetch_slots <= prefetch_slots)
 	  return true;
@@ -1073,15 +1134,15 @@
 {
   HOST_WIDE_INT delta;
   tree addr, addr_base, write_p, local, forward;
-  gimple prefetch;
+  gcall *prefetch;
   gimple_stmt_iterator bsi;
   unsigned n_prefetches, ap;
   bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Issued%s prefetch for %p.\n",
+    fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
 	     nontemporal ? " nontemporal" : "",
-	     (void *) ref);
+	     ref->group->uid, ref->uid);
 
   bsi = gsi_for_stmt (ref->stmt);
 
@@ -1100,10 +1161,9 @@
           /* Determine the address to prefetch.  */
           delta = (ahead + ap * ref->prefetch_mod) *
 		   int_cst_value (ref->group->step);
-          addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node,
-                              addr_base, size_int (delta));
-          addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL,
-                                           true, GSI_SAME_STMT);
+          addr = fold_build_pointer_plus_hwi (addr_base, delta);
+          addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+					   NULL, true, GSI_SAME_STMT);
         }
       else
         {
@@ -1112,13 +1172,24 @@
           forward = fold_build2 (MULT_EXPR, sizetype,
                                  fold_convert (sizetype, ref->group->step),
                                  fold_convert (sizetype, size_int (ahead)));
-          addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, addr_base,
-			      forward);
+          addr = fold_build_pointer_plus (addr_base, forward);
           addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
 					   NULL, true, GSI_SAME_STMT);
       }
+
+      if (addr_base != addr
+	  && TREE_CODE (addr_base) == SSA_NAME
+	  && TREE_CODE (addr) == SSA_NAME)
+	{
+	  duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
+	  /* As this isn't a plain copy we have to reset alignment
+	     information.  */
+	  if (SSA_NAME_PTR_INFO (addr))
+	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
+	}
+
       /* Create the prefetch instruction.  */
-      prefetch = gimple_build_call (built_in_decls[BUILT_IN_PREFETCH],
+      prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
 				    3, addr, write_p, local);
       gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
     }
@@ -1146,7 +1217,7 @@
 static bool
 nontemporal_store_p (struct mem_ref *ref)
 {
-  enum machine_mode mode;
+  machine_mode mode;
   enum insn_code code;
 
   /* REF must be a write that is not reused.  We require it to be independent
@@ -1176,8 +1247,8 @@
     return false;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Marked reference %p as a nontemporal store.\n",
-	     (void *) ref);
+    fprintf (dump_file, "Marked reference %u:%u as a nontemporal store.\n",
+	     ref->group->uid, ref->uid);
 
   gimple_assign_set_nontemporal_move (ref->stmt, true);
   ref->storent_p = true;
@@ -1190,13 +1261,13 @@
 static void
 emit_mfence_after_loop (struct loop *loop)
 {
-  VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+  vec<edge> exits = get_loop_exit_edges (loop);
   edge exit;
-  gimple call;
+  gcall *call;
   gimple_stmt_iterator bsi;
   unsigned i;
 
-  FOR_EACH_VEC_ELT (edge, exits, i, exit)
+  FOR_EACH_VEC_ELT (exits, i, exit)
     {
       call = gimple_build_call (FENCE_FOLLOWING_MOVNT, 0);
 
@@ -1208,10 +1279,9 @@
       bsi = gsi_after_labels (exit->dest);
 
       gsi_insert_before (&bsi, call, GSI_NEW_STMT);
-      mark_virtual_ops_for_renaming (call);
     }
 
-  VEC_free (edge, heap, exits);
+  exits.release ();
   update_ssa (TODO_update_ssa_only_virtuals);
 }
 
@@ -1229,16 +1299,16 @@
      is a suitable place for it at each of the loop exits.  */
   if (FENCE_FOLLOWING_MOVNT != NULL_TREE)
     {
-      VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+      vec<edge> exits = get_loop_exit_edges (loop);
       unsigned i;
       edge exit;
 
-      FOR_EACH_VEC_ELT (edge, exits, i, exit)
+      FOR_EACH_VEC_ELT (exits, i, exit)
 	if ((exit->flags & EDGE_ABNORMAL)
-	    && exit->dest == EXIT_BLOCK_PTR)
+	    && exit->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 	  ret = false;
 
-      VEC_free (edge, heap, exits);
+      exits.release ();
     }
 
   return ret;
@@ -1288,7 +1358,7 @@
 
 /* Determine the coefficient by that unroll LOOP, from the information
    contained in the list of memory references REFS.  Description of
-   umber of iterations of LOOP is stored to DESC.  NINSNS is the number of
+   number of iterations of LOOP is stored to DESC.  NINSNS is the number of
    insns of the LOOP.  EST_NITER is the estimated number of iterations of
    the loop, or -1 if no estimate is available.  */
 
@@ -1412,8 +1482,8 @@
       if ((unsigned) loop_depth (aloop) <= min_depth)
 	continue;
 
-      if (host_integerp (step, 0))
-	astep = tree_low_cst (step, 0);
+      if (tree_fits_shwi_p (step))
+	astep = tree_to_shwi (step);
       else
 	astep = L1_CACHE_LINE_SIZE;
 
@@ -1433,7 +1503,7 @@
 {
   tree stride, access_fn;
   HOST_WIDE_INT *strides, astride;
-  VEC (tree, heap) *access_fns;
+  vec<tree> access_fns;
   tree ref = DR_REF (dr);
   unsigned i, ret = ~0u;
 
@@ -1452,7 +1522,7 @@
   strides = XCNEWVEC (HOST_WIDE_INT, n);
   access_fns = DR_ACCESS_FNS (dr);
 
-  FOR_EACH_VEC_ELT (tree, access_fns, i, access_fn)
+  FOR_EACH_VEC_ELT (access_fns, i, access_fn)
     {
       /* Keep track of the reference corresponding to the subscript, so that we
 	 know its stride.  */
@@ -1462,8 +1532,8 @@
       if (TREE_CODE (ref) == ARRAY_REF)
 	{
 	  stride = TYPE_SIZE_UNIT (TREE_TYPE (ref));
-	  if (host_integerp (stride, 1))
-	    astride = tree_low_cst (stride, 1);
+	  if (tree_fits_uhwi_p (stride))
+	    astride = tree_to_uhwi (stride);
 	  else
 	    astride = L1_CACHE_LINE_SIZE;
 
@@ -1496,18 +1566,18 @@
 
 /* Determines the distance till the first reuse of each reference in REFS
    in the loop nest of LOOP.  NO_OTHER_REFS is true if there are no other
-   memory references in the loop.  */
+   memory references in the loop.  Return false if the analysis fails.  */
 
-static void
+static bool
 determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
 			   bool no_other_refs)
 {
   struct loop *nest, *aloop;
-  VEC (data_reference_p, heap) *datarefs = NULL;
-  VEC (ddr_p, heap) *dependences = NULL;
+  vec<data_reference_p> datarefs = vNULL;
+  vec<ddr_p> dependences = vNULL;
   struct mem_ref_group *gr;
   struct mem_ref *ref, *refb;
-  VEC (loop_p, heap) *vloops = NULL;
+  auto_vec<loop_p> vloops;
   unsigned *loop_data_size;
   unsigned i, j, n;
   unsigned volume, dist, adist;
@@ -1516,7 +1586,7 @@
   ddr_p dep;
 
   if (loop->inner)
-    return;
+    return true;
 
   /* Find the outermost loop of the loop nest of loop (we require that
      there are no sibling loops inside the nest).  */
@@ -1536,7 +1606,7 @@
      We use this to estimate whether the reference is evicted from the
      cache before its reuse.  */
   find_loop_nest (nest, &vloops);
-  n = VEC_length (loop_p, vloops);
+  n = vloops.length ();
   loop_data_size = XNEWVEC (unsigned, n);
   volume = volume_of_references (refs);
   i = n;
@@ -1548,9 +1618,9 @@
       if (volume > L2_CACHE_SIZE_BYTES)
 	continue;
 
-      aloop = VEC_index (loop_p, vloops, i);
-      vol = estimated_loop_iterations_int (aloop, false);
-      if (vol < 0)
+      aloop = vloops[i];
+      vol = estimated_stmt_executions_int (aloop);
+      if (vol == -1)
 	vol = expected_loop_iterations (aloop);
       volume *= vol;
     }
@@ -1562,20 +1632,21 @@
   for (gr = refs; gr; gr = gr->next)
     for (ref = gr->refs; ref; ref = ref->next)
       {
-	dr = create_data_ref (nest, loop_containing_stmt (ref->stmt),
-			      ref->mem, ref->stmt, !ref->write_p);
+	dr = create_data_ref (loop_preheader_edge (nest),
+			      loop_containing_stmt (ref->stmt),
+			      ref->mem, ref->stmt, !ref->write_p, false);
 
 	if (dr)
 	  {
 	    ref->reuse_distance = volume;
 	    dr->aux = ref;
-	    VEC_safe_push (data_reference_p, heap, datarefs, dr);
+	    datarefs.safe_push (dr);
 	  }
 	else
 	  no_other_refs = false;
       }
 
-  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       dist = self_reuse_distance (dr, loop_data_size, n, loop);
       ref = (struct mem_ref *) dr->aux;
@@ -1586,9 +1657,10 @@
 	ref->independent_p = true;
     }
 
-  compute_all_dependences (datarefs, &dependences, vloops, true);
+  if (!compute_all_dependences (datarefs, &dependences, vloops, true))
+    return false;
 
-  FOR_EACH_VEC_ELT (ddr_p, dependences, i, dep)
+  FOR_EACH_VEC_ELT (dependences, i, dep)
     {
       if (DDR_ARE_DEPENDENT (dep) == chrec_known)
 	continue;
@@ -1597,6 +1669,7 @@
       refb = (struct mem_ref *) DDR_B (dep)->aux;
 
       if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
+	  || DDR_COULD_BE_INDEPENDENT_P (dep)
 	  || DDR_NUM_DIST_VECTS (dep) == 0)
 	{
 	  /* If the dependence cannot be analyzed, assume that there might be
@@ -1662,9 +1735,11 @@
       fprintf (dump_file, "Reuse distances:\n");
       for (gr = refs; gr; gr = gr->next)
 	for (ref = gr->refs; ref; ref = ref->next)
-	  fprintf (dump_file, " ref %p distance %u\n",
-		   (void *) ref, ref->reuse_distance);
+	  fprintf (dump_file, " reference %u:%u distance %u\n",
+		   ref->group->uid, ref->uid, ref->reuse_distance);
     }
+
+  return true;
 }
 
 /* Determine whether or not the trip count to ahead ratio is too small based
@@ -1801,7 +1876,9 @@
     return false;
 
   ahead = (PREFETCH_LATENCY + time - 1) / time;
-  est_niter = estimated_loop_iterations_int (loop, false);
+  est_niter = estimated_stmt_executions_int (loop);
+  if (est_niter == -1)
+    est_niter = likely_max_stmt_executions_int (loop);
 
   /* Prefetching is not likely to be profitable if the trip count to ahead
      ratio is too small.  */
@@ -1825,7 +1902,8 @@
   if (nothing_to_prefetch_p (refs))
     goto fail;
 
-  determine_loop_nest_reuse (loop, refs, no_other_refs);
+  if (!determine_loop_nest_reuse (loop, refs, no_other_refs))
+    goto fail;
 
   /* Step 3: determine unroll factor.  */
   unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
@@ -1877,16 +1955,15 @@
 unsigned int
 tree_ssa_prefetch_arrays (void)
 {
-  loop_iterator li;
   struct loop *loop;
   bool unrolled = false;
   int todo_flags = 0;
 
-  if (!HAVE_prefetch
+  if (!targetm.have_prefetch ()
       /* It is possible to ask compiler for say -mtune=i486 -march=pentium4.
 	 -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part
 	 of processor costs and i486 does not have prefetch, but
-	 -march=pentium4 causes HAVE_prefetch to be true.  Ugh.  */
+	 -march=pentium4 causes targetm.have_prefetch to be true.  Ugh.  */
       || PREFETCH_BLOCK == 0)
     return 0;
 
@@ -1910,7 +1987,7 @@
 
   initialize_original_copy_tables ();
 
-  if (!built_in_decls[BUILT_IN_PREFETCH])
+  if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH))
     {
       tree type = build_function_type_list (void_type_node,
 					    const_ptr_type_node, NULL_TREE);
@@ -1918,14 +1995,10 @@
 					BUILT_IN_PREFETCH, BUILT_IN_NORMAL,
 					NULL, NULL_TREE);
       DECL_IS_NOVOPS (decl) = true;
-      built_in_decls[BUILT_IN_PREFETCH] = decl;
+      set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
     }
 
-  /* We assume that size of cache line is a power of two, so verify this
-     here.  */
-  gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0);
-
-  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Processing loop %d:\n", loop->num);
@@ -1945,3 +2018,66 @@
   free_original_copy_tables ();
   return todo_flags;
 }
+
+/* Prefetching.  */
+
+namespace {
+
+const pass_data pass_data_loop_prefetch =
+{
+  GIMPLE_PASS, /* type */
+  "aprefetch", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_TREE_PREFETCH, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_loop_prefetch : public gimple_opt_pass
+{
+public:
+  pass_loop_prefetch (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_loop_prefetch, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_prefetch_loop_arrays > 0; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_loop_prefetch
+
+unsigned int
+pass_loop_prefetch::execute (function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0)
+    {
+      static bool warned = false;
+
+      if (!warned)
+	{
+	  warning (OPT_Wdisabled_optimization,
+		   "%<l1-cache-size%> parameter is not a power of two %d",
+		   PREFETCH_BLOCK);
+	  warned = true;
+	}
+      return 0;
+    }
+
+  return tree_ssa_prefetch_arrays ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_loop_prefetch (gcc::context *ctxt)
+{
+  return new pass_loop_prefetch (ctxt);
+}
+
+