Mercurial > hg > CbC > CbC_gcc
diff gcc/tree-vectorizer.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/tree-vectorizer.c Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/tree-vectorizer.c Fri Oct 27 22:46:09 2017 +0900 @@ -1,6 +1,5 @@ /* Vectorizer - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. + Copyright (C) 2003-2017 Free Software Foundation, Inc. Contributed by Dorit Naishlos <dorit@il.ibm.com> This file is part of GCC. @@ -58,109 +57,535 @@ #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" -#include "ggc.h" +#include "backend.h" #include "tree.h" -#include "tree-pretty-print.h" -#include "tree-flow.h" -#include "tree-dump.h" +#include "gimple.h" +#include "predict.h" +#include "tree-pass.h" +#include "ssa.h" +#include "cgraph.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop-niter.h" +#include "tree-cfg.h" #include "cfgloop.h" -#include "cfglayout.h" #include "tree-vectorizer.h" -#include "tree-pass.h" -#include "timevar.h" +#include "tree-ssa-propagate.h" +#include "dbgcnt.h" +#include "tree-scalar-evolution.h" +#include "stringpool.h" +#include "attribs.h" -/* vect_dump will be set to stderr or dump_file if exist. */ -FILE *vect_dump; - -/* vect_verbosity_level set to an invalid value - to mark that it's uninitialized. */ -static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; /* Loop or bb location. */ -LOC vect_location; +source_location vect_location; /* Vector mapping GIMPLE stmt to stmt_vec_info. */ -VEC(vec_void_p,heap) *stmt_vec_info_vec; - +vec<stmt_vec_info> stmt_vec_info_vec; +/* For mapping simduid to vectorization factor. */ -/* Function vect_set_dump_settings. +struct simduid_to_vf : free_ptr_hash<simduid_to_vf> +{ + unsigned int simduid; + int vf; + + /* hash_table support. */ + static inline hashval_t hash (const simduid_to_vf *); + static inline int equal (const simduid_to_vf *, const simduid_to_vf *); +}; + +inline hashval_t +simduid_to_vf::hash (const simduid_to_vf *p) +{ + return p->simduid; +} + +inline int +simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) +{ + return p1->simduid == p2->simduid; +} + +/* This hash maps the OMP simd array to the corresponding simduid used + to index into it. Like thus, - Fix the verbosity level of the vectorizer if the - requested level was not set explicitly using the flag - -ftree-vectorizer-verbose=N. - Decide where to print the debugging information (dump_file/stderr). - If the user defined the verbosity level, but there is no dump file, - print to stderr, otherwise print to the dump file. */ + _7 = GOMP_SIMD_LANE (simduid.0) + ... + ... + D.1737[_7] = stuff; + + + This hash maps from the OMP simd array (D.1737[]) to DECL_UID of + simduid.0. */ + +struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid> +{ + tree decl; + unsigned int simduid; + + /* hash_table support. */ + static inline hashval_t hash (const simd_array_to_simduid *); + static inline int equal (const simd_array_to_simduid *, + const simd_array_to_simduid *); +}; + +inline hashval_t +simd_array_to_simduid::hash (const simd_array_to_simduid *p) +{ + return DECL_UID (p->decl); +} + +inline int +simd_array_to_simduid::equal (const simd_array_to_simduid *p1, + const simd_array_to_simduid *p2) +{ + return p1->decl == p2->decl; +} + +/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, + into their corresponding constants and remove + IFN_GOMP_SIMD_ORDERED_{START,END}. */ static void -vect_set_dump_settings (bool slp) +adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) { - vect_dump = dump_file; + basic_block bb; - /* Check if the verbosity level was defined by the user: */ - if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL) + FOR_EACH_BB_FN (bb, cfun) { - vect_verbosity_level = user_vect_verbosity_level; - /* Ignore user defined verbosity if dump flags require higher level of - verbosity. */ - if (dump_file) - { - if (((dump_flags & TDF_DETAILS) - && vect_verbosity_level >= REPORT_DETAILS) - || ((dump_flags & TDF_STATS) - && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS)) - return; - } - else - { - /* If there is no dump file, print to stderr in case of loop - vectorization. */ - if (!slp) - vect_dump = stderr; + gimple_stmt_iterator i; - return; - } + for (i = gsi_start_bb (bb); !gsi_end_p (i); ) + { + unsigned int vf = 1; + enum internal_fn ifn; + gimple *stmt = gsi_stmt (i); + tree t; + if (!is_gimple_call (stmt) + || !gimple_call_internal_p (stmt)) + { + gsi_next (&i); + continue; + } + ifn = gimple_call_internal_fn (stmt); + switch (ifn) + { + case IFN_GOMP_SIMD_LANE: + case IFN_GOMP_SIMD_VF: + case IFN_GOMP_SIMD_LAST_LANE: + break; + case IFN_GOMP_SIMD_ORDERED_START: + case IFN_GOMP_SIMD_ORDERED_END: + if (integer_onep (gimple_call_arg (stmt, 0))) + { + enum built_in_function bcode + = (ifn == IFN_GOMP_SIMD_ORDERED_START + ? BUILT_IN_GOMP_ORDERED_START + : BUILT_IN_GOMP_ORDERED_END); + gimple *g + = gimple_build_call (builtin_decl_explicit (bcode), 0); + tree vdef = gimple_vdef (stmt); + gimple_set_vdef (g, vdef); + SSA_NAME_DEF_STMT (vdef) = g; + gimple_set_vuse (g, gimple_vuse (stmt)); + gsi_replace (&i, g, true); + continue; + } + gsi_remove (&i, true); + unlink_stmt_vdef (stmt); + continue; + default: + gsi_next (&i); + continue; + } + tree arg = gimple_call_arg (stmt, 0); + gcc_assert (arg != NULL_TREE); + gcc_assert (TREE_CODE (arg) == SSA_NAME); + simduid_to_vf *p = NULL, data; + data.simduid = DECL_UID (SSA_NAME_VAR (arg)); + /* Need to nullify loop safelen field since it's value is not + valid after transformation. */ + if (bb->loop_father && bb->loop_father->safelen > 0) + bb->loop_father->safelen = 0; + if (htab) + { + p = htab->find (&data); + if (p) + vf = p->vf; + } + switch (ifn) + { + case IFN_GOMP_SIMD_VF: + t = build_int_cst (unsigned_type_node, vf); + break; + case IFN_GOMP_SIMD_LANE: + t = build_int_cst (unsigned_type_node, 0); + break; + case IFN_GOMP_SIMD_LAST_LANE: + t = gimple_call_arg (stmt, 1); + break; + default: + gcc_unreachable (); + } + tree lhs = gimple_call_lhs (stmt); + if (lhs) + replace_uses_by (lhs, t); + release_defs (stmt); + gsi_remove (&i, true); + } } +} - /* User didn't specify verbosity level: */ - if (dump_file && (dump_flags & TDF_DETAILS)) - vect_verbosity_level = REPORT_DETAILS; - else if (dump_file && (dump_flags & TDF_STATS)) - vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS; - else - vect_verbosity_level = REPORT_NONE; +/* Helper structure for note_simd_array_uses. */ + +struct note_simd_array_uses_struct +{ + hash_table<simd_array_to_simduid> **htab; + unsigned int simduid; +}; + +/* Callback for note_simd_array_uses, called through walk_gimple_op. */ + +static tree +note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + struct note_simd_array_uses_struct *ns + = (struct note_simd_array_uses_struct *) wi->info; - gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); + if (TYPE_P (*tp)) + *walk_subtrees = 0; + else if (VAR_P (*tp) + && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp)) + && DECL_CONTEXT (*tp) == current_function_decl) + { + simd_array_to_simduid data; + if (!*ns->htab) + *ns->htab = new hash_table<simd_array_to_simduid> (15); + data.decl = *tp; + data.simduid = ns->simduid; + simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT); + if (*slot == NULL) + { + simd_array_to_simduid *p = XNEW (simd_array_to_simduid); + *p = data; + *slot = p; + } + else if ((*slot)->simduid != ns->simduid) + (*slot)->simduid = -1U; + *walk_subtrees = 0; + } + return NULL_TREE; } +/* Find "omp simd array" temporaries and map them to corresponding + simduid. */ -/* Function debug_loop_details. +static void +note_simd_array_uses (hash_table<simd_array_to_simduid> **htab) +{ + basic_block bb; + gimple_stmt_iterator gsi; + struct walk_stmt_info wi; + struct note_simd_array_uses_struct ns; + + memset (&wi, 0, sizeof (wi)); + wi.info = &ns; + ns.htab = htab; + + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) + continue; + switch (gimple_call_internal_fn (stmt)) + { + case IFN_GOMP_SIMD_LANE: + case IFN_GOMP_SIMD_VF: + case IFN_GOMP_SIMD_LAST_LANE: + break; + default: + continue; + } + tree lhs = gimple_call_lhs (stmt); + if (lhs == NULL_TREE) + continue; + imm_use_iterator use_iter; + gimple *use_stmt; + ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) + if (!is_gimple_debug (use_stmt)) + walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); + } +} + +/* Shrink arrays with "omp simd array" attribute to the corresponding + vectorization factor. */ - For vectorization debug dumps. */ +static void +shrink_simd_arrays + (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab, + hash_table<simduid_to_vf> *simduid_to_vf_htab) +{ + for (hash_table<simd_array_to_simduid>::iterator iter + = simd_array_to_simduid_htab->begin (); + iter != simd_array_to_simduid_htab->end (); ++iter) + if ((*iter)->simduid != -1U) + { + tree decl = (*iter)->decl; + int vf = 1; + if (simduid_to_vf_htab) + { + simduid_to_vf *p = NULL, data; + data.simduid = (*iter)->simduid; + p = simduid_to_vf_htab->find (&data); + if (p) + vf = p->vf; + } + tree atype + = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); + TREE_TYPE (decl) = atype; + relayout_decl (decl); + } + + delete simd_array_to_simduid_htab; +} + +/* Initialize the vec_info with kind KIND_IN and target cost data + TARGET_COST_DATA_IN. */ + +vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in) + : kind (kind_in), + datarefs (vNULL), + ddrs (vNULL), + target_cost_data (target_cost_data_in) +{ +} + +vec_info::~vec_info () +{ + slp_instance instance; + struct data_reference *dr; + unsigned int i; + + FOR_EACH_VEC_ELT (datarefs, i, dr) + if (dr->aux) + { + free (dr->aux); + dr->aux = NULL; + } + + FOR_EACH_VEC_ELT (slp_instances, i, instance) + vect_free_slp_instance (instance); + + free_data_refs (datarefs); + free_dependence_relations (ddrs); + destroy_cost_data (target_cost_data); +} + +/* A helper function to free scev and LOOP niter information, as well as + clear loop constraint LOOP_C_FINITE. */ + +void +vect_free_loop_info_assumptions (struct loop *loop) +{ + scev_reset_htab (); + /* We need to explicitly reset upper bound information since they are + used even after free_numbers_of_iterations_estimates. */ + loop->any_upper_bound = false; + loop->any_likely_upper_bound = false; + free_numbers_of_iterations_estimates (loop); + loop_constraint_clear (loop, LOOP_C_FINITE); +} + +/* Return whether STMT is inside the region we try to vectorize. */ bool -vect_print_dump_info (enum vect_verbosity_levels vl) +vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt) { - if (vl > vect_verbosity_level) + if (!gimple_bb (stmt)) return false; - if (!current_function_decl || !vect_dump) - return false; - - if (vect_location == UNKNOWN_LOC) - fprintf (vect_dump, "\n%s:%d: note: ", - DECL_SOURCE_FILE (current_function_decl), - DECL_SOURCE_LINE (current_function_decl)); + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) + { + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + return false; + } else - fprintf (vect_dump, "\n%s:%d: note: ", - LOC_FILE (vect_location), LOC_LINE (vect_location)); + { + bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); + if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo) + || gimple_uid (stmt) == -1U + || gimple_code (stmt) == GIMPLE_PHI) + return false; + } return true; } +/* If LOOP has been versioned during ifcvt, return the internal call + guarding it. */ + +static gimple * +vect_loop_vectorized_call (struct loop *loop) +{ + basic_block bb = loop_preheader_edge (loop)->src; + gimple *g; + do + { + g = last_stmt (bb); + if (g) + break; + if (!single_pred_p (bb)) + break; + bb = single_pred (bb); + } + while (1); + if (g && gimple_code (g) == GIMPLE_COND) + { + gimple_stmt_iterator gsi = gsi_for_stmt (g); + gsi_prev (&gsi); + if (!gsi_end_p (gsi)) + { + g = gsi_stmt (gsi); + if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED) + && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num + || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) + return g; + } + } + return NULL; +} + +/* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS + to VALUE and update any immediate uses of it's LHS. */ + +static void +fold_loop_internal_call (gimple *g, tree value) +{ + tree lhs = gimple_call_lhs (g); + use_operand_p use_p; + imm_use_iterator iter; + gimple *use_stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (g); + + update_call_from_tree (&gsi, value); + FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, value); + update_stmt (use_stmt); + } +} + +/* If LOOP has been versioned during loop distribution, return the gurading + internal call. */ + +static gimple * +vect_loop_dist_alias_call (struct loop *loop) +{ + basic_block bb; + basic_block entry; + struct loop *outer, *orig; + gimple_stmt_iterator gsi; + gimple *g; + + if (loop->orig_loop_num == 0) + return NULL; + + orig = get_loop (cfun, loop->orig_loop_num); + if (orig == NULL) + { + /* The original loop is somehow destroyed. Clear the information. */ + loop->orig_loop_num = 0; + return NULL; + } + + if (loop != orig) + bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header); + else + bb = loop_preheader_edge (loop)->src; + + outer = bb->loop_father; + entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); + + /* Look upward in dominance tree. */ + for (; bb != entry && flow_bb_inside_loop_p (outer, bb); + bb = get_immediate_dominator (CDI_DOMINATORS, bb)) + { + g = last_stmt (bb); + if (g == NULL || gimple_code (g) != GIMPLE_COND) + continue; + + gsi = gsi_for_stmt (g); + gsi_prev (&gsi); + if (gsi_end_p (gsi)) + continue; + + g = gsi_stmt (gsi); + /* The guarding internal function call must have the same distribution + alias id. */ + if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS) + && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num)) + return g; + } + return NULL; +} + +/* Set the uids of all the statements in basic blocks inside loop + represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal + call guarding the loop which has been if converted. */ +static void +set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) +{ + tree arg = gimple_call_arg (loop_vectorized_call, 1); + basic_block *bbs; + unsigned int i; + struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); + + LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; + gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) + == loop_vectorized_call); + /* If we are going to vectorize outer loop, prevent vectorization + of the inner loop in the scalar loop - either the scalar loop is + thrown away, so it is a wasted work, or is used only for + a few iterations. */ + if (scalar_loop->inner) + { + gimple *g = vect_loop_vectorized_call (scalar_loop->inner); + if (g) + { + arg = gimple_call_arg (g, 0); + get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; + fold_loop_internal_call (g, boolean_false_node); + } + } + bbs = get_loop_body (scalar_loop); + for (i = 0; i < scalar_loop->num_nodes; i++) + { + basic_block bb = bbs[i]; + gimple_stmt_iterator gsi; + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *phi = gsi_stmt (gsi); + gimple_set_uid (phi, 0); + } + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + gimple_set_uid (stmt, 0); + } + } + free (bbs); +} + /* Function vectorize_loops. Entry point to loop vectorization phase. */ @@ -171,17 +596,21 @@ unsigned int i; unsigned int num_vectorized_loops = 0; unsigned int vect_loops_num; - loop_iterator li; struct loop *loop; + hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; + hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; + bool any_ifcvt_loops = false; + unsigned ret = 0; + struct loop *new_loop; - vect_loops_num = number_of_loops (); + vect_loops_num = number_of_loops (cfun); /* Bail out if there are no loops. */ if (vect_loops_num <= 1) return 0; - /* Fix the verbosity level if not defined explicitly by the user. */ - vect_set_dump_settings (false); + if (cfun->has_simduid_loops) + note_simd_array_uses (&simd_array_to_simduid_htab); init_stmt_vec_info_vec (); @@ -190,113 +619,405 @@ /* If some loop was duplicated, it gets bigger number than all previously defined loops. This fact allows us to run only over initial loops skipping newly generated ones. */ - FOR_EACH_LOOP (li, loop, 0) - if (optimize_loop_nest_for_speed_p (loop)) + FOR_EACH_LOOP (loop, 0) + if (loop->dont_vectorize) { - loop_vec_info loop_vinfo; + any_ifcvt_loops = true; + /* If-conversion sometimes versions both the outer loop + (for the case when outer loop vectorization might be + desirable) as well as the inner loop in the scalar version + of the loop. So we have: + if (LOOP_VECTORIZED (1, 3)) + { + loop1 + loop2 + } + else + loop3 (copy of loop1) + if (LOOP_VECTORIZED (4, 5)) + loop4 (copy of loop2) + else + loop5 (copy of loop4) + If FOR_EACH_LOOP gives us loop3 first (which has + dont_vectorize set), make sure to process loop1 before loop4; + so that we can prevent vectorization of loop4 if loop1 + is successfully vectorized. */ + if (loop->inner) + { + gimple *loop_vectorized_call + = vect_loop_vectorized_call (loop); + if (loop_vectorized_call + && vect_loop_vectorized_call (loop->inner)) + { + tree arg = gimple_call_arg (loop_vectorized_call, 0); + struct loop *vector_loop + = get_loop (cfun, tree_to_shwi (arg)); + if (vector_loop && vector_loop != loop) + { + loop = vector_loop; + /* Make sure we don't vectorize it twice. */ + loop->dont_vectorize = true; + goto try_vectorize; + } + } + } + } + else + { + loop_vec_info loop_vinfo, orig_loop_vinfo; + gimple *loop_vectorized_call, *loop_dist_alias_call; + try_vectorize: + if (!((flag_tree_loop_vectorize + && optimize_loop_nest_for_speed_p (loop)) + || loop->force_vectorize)) + continue; + orig_loop_vinfo = NULL; + loop_vectorized_call = vect_loop_vectorized_call (loop); + loop_dist_alias_call = vect_loop_dist_alias_call (loop); + vectorize_epilogue: + vect_location = find_loop_location (loop); + if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION + && dump_enabled_p ()) + dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", + LOCATION_FILE (vect_location), + LOCATION_LINE (vect_location)); - vect_location = find_loop_location (loop); - loop_vinfo = vect_analyze_loop (loop); + loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) - continue; + { + /* Free existing information if loop is analyzed with some + assumptions. */ + if (loop_constraint_set_p (loop, LOOP_C_FINITE)) + vect_free_loop_info_assumptions (loop); + + /* If we applied if-conversion then try to vectorize the + BB of innermost loops. + ??? Ideally BB vectorization would learn to vectorize + control flow by applying if-conversion on-the-fly, the + following retains the if-converted loop body even when + only non-if-converted parts took part in BB vectorization. */ + if (flag_tree_slp_vectorize != 0 + && loop_vectorized_call + && ! loop->inner) + { + basic_block bb = loop->header; + bool has_mask_load_store = false; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) + && gimple_call_internal_p (stmt) + && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD + || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) + { + has_mask_load_store = true; + break; + } + gimple_set_uid (stmt, -1); + gimple_set_visited (stmt, false); + } + if (! has_mask_load_store && vect_slp_bb (bb)) + { + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "basic block vectorized\n"); + fold_loop_internal_call (loop_vectorized_call, + boolean_true_node); + loop_vectorized_call = NULL; + ret |= TODO_cleanup_cfg; + } + } + /* If outer loop vectorization fails for LOOP_VECTORIZED guarded + loop, don't vectorize its inner loop; we'll attempt to + vectorize LOOP_VECTORIZED guarded inner loop of the scalar + loop version. */ + if (loop_vectorized_call && loop->inner) + loop->inner->dont_vectorize = true; + continue; + } - vect_transform_loop (loop_vinfo); + if (!dbg_cnt (vect_loop)) + { + /* We may miss some if-converted loops due to + debug counter. Set any_ifcvt_loops to visit + them at finalization. */ + any_ifcvt_loops = true; + /* Free existing information if loop is analyzed with some + assumptions. */ + if (loop_constraint_set_p (loop, LOOP_C_FINITE)) + vect_free_loop_info_assumptions (loop); + + break; + } + + if (loop_vectorized_call) + set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); + if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION + && dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "loop vectorized\n"); + new_loop = vect_transform_loop (loop_vinfo); num_vectorized_loops++; + /* Now that the loop has been vectorized, allow it to be unrolled + etc. */ + loop->force_vectorize = false; + + if (loop->simduid) + { + simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); + if (!simduid_to_vf_htab) + simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); + simduid_to_vf_data->simduid = DECL_UID (loop->simduid); + simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; + *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) + = simduid_to_vf_data; + } + + if (loop_vectorized_call) + { + fold_loop_internal_call (loop_vectorized_call, boolean_true_node); + loop_vectorized_call = NULL; + ret |= TODO_cleanup_cfg; + } + if (loop_dist_alias_call) + { + tree value = gimple_call_arg (loop_dist_alias_call, 1); + fold_loop_internal_call (loop_dist_alias_call, value); + loop_dist_alias_call = NULL; + ret |= TODO_cleanup_cfg; + } + + if (new_loop) + { + /* Epilogue of vectorized loop must be vectorized too. */ + vect_loops_num = number_of_loops (cfun); + loop = new_loop; + orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ + goto vectorize_epilogue; + } } - vect_location = UNKNOWN_LOC; + vect_location = UNKNOWN_LOCATION; statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS) - || (num_vectorized_loops > 0 - && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))) - fprintf (vect_dump, "vectorized %u loops in function.\n", - num_vectorized_loops); + if (dump_enabled_p () + || (num_vectorized_loops > 0 && dump_enabled_p ())) + dump_printf_loc (MSG_NOTE, vect_location, + "vectorized %u loops in function.\n", + num_vectorized_loops); /* ----------- Finalize. ----------- */ - mark_sym_for_renaming (gimple_vop (cfun)); + if (any_ifcvt_loops) + for (i = 1; i < vect_loops_num; i++) + { + loop = get_loop (cfun, i); + if (loop && loop->dont_vectorize) + { + gimple *g = vect_loop_vectorized_call (loop); + if (g) + { + fold_loop_internal_call (g, boolean_false_node); + ret |= TODO_cleanup_cfg; + g = NULL; + } + else + g = vect_loop_dist_alias_call (loop); + + if (g) + { + fold_loop_internal_call (g, boolean_false_node); + ret |= TODO_cleanup_cfg; + } + } + } for (i = 1; i < vect_loops_num; i++) { loop_vec_info loop_vinfo; + bool has_mask_store; - loop = get_loop (i); + loop = get_loop (cfun, i); if (!loop) continue; loop_vinfo = (loop_vec_info) loop->aux; - destroy_loop_vec_info (loop_vinfo, true); + has_mask_store = false; + if (loop_vinfo) + has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); + delete loop_vinfo; + if (has_mask_store) + optimize_mask_stores (loop); loop->aux = NULL; } free_stmt_vec_info_vec (); - return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; + /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ + if (cfun->has_simduid_loops) + adjust_simduid_builtins (simduid_to_vf_htab); + + /* Shrink any "omp array simd" temporary arrays to the + actual vectorization factors. */ + if (simd_array_to_simduid_htab) + shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab); + delete simduid_to_vf_htab; + cfun->has_simduid_loops = false; + + if (num_vectorized_loops > 0) + { + /* If we vectorized any loop only virtual SSA form needs to be updated. + ??? Also while we try hard to update loop-closed SSA form we fail + to properly do this in some corner-cases (see PR56286). */ + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); + return TODO_cleanup_cfg; + } + + return ret; +} + + +/* Entry point to the simduid cleanup pass. */ + +namespace { + +const pass_data pass_data_simduid_cleanup = +{ + GIMPLE_PASS, /* type */ + "simduid", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + ( PROP_ssa | PROP_cfg ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_simduid_cleanup : public gimple_opt_pass +{ +public: + pass_simduid_cleanup (gcc::context *ctxt) + : gimple_opt_pass (pass_data_simduid_cleanup, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); } + virtual bool gate (function *fun) { return fun->has_simduid_loops; } + virtual unsigned int execute (function *); + +}; // class pass_simduid_cleanup + +unsigned int +pass_simduid_cleanup::execute (function *fun) +{ + hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; + + note_simd_array_uses (&simd_array_to_simduid_htab); + + /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ + adjust_simduid_builtins (NULL); + + /* Shrink any "omp array simd" temporary arrays to the + actual vectorization factors. */ + if (simd_array_to_simduid_htab) + shrink_simd_arrays (simd_array_to_simduid_htab, NULL); + fun->has_simduid_loops = false; + return 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_simduid_cleanup (gcc::context *ctxt) +{ + return new pass_simduid_cleanup (ctxt); } /* Entry point to basic block SLP phase. */ -static unsigned int -execute_vect_slp (void) +namespace { + +const pass_data pass_data_slp_vectorize = +{ + GIMPLE_PASS, /* type */ + "slp", /* name */ + OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ + TV_TREE_SLP_VECTORIZATION, /* tv_id */ + ( PROP_ssa | PROP_cfg ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_slp_vectorize : public gimple_opt_pass +{ +public: + pass_slp_vectorize (gcc::context *ctxt) + : gimple_opt_pass (pass_data_slp_vectorize, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } + virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } + virtual unsigned int execute (function *); + +}; // class pass_slp_vectorize + +unsigned int +pass_slp_vectorize::execute (function *fun) { basic_block bb; - /* Fix the verbosity level if not defined explicitly by the user. */ - vect_set_dump_settings (true); + bool in_loop_pipeline = scev_initialized_p (); + if (!in_loop_pipeline) + { + loop_optimizer_init (LOOPS_NORMAL); + scev_initialize (); + } + + /* Mark all stmts as not belonging to the current region and unvisited. */ + FOR_EACH_BB_FN (bb, fun) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + gimple_set_uid (stmt, -1); + gimple_set_visited (stmt, false); + } + } init_stmt_vec_info_vec (); - FOR_EACH_BB (bb) + FOR_EACH_BB_FN (bb, fun) { - vect_location = find_bb_location (bb); - - if (vect_slp_analyze_bb (bb)) - { - vect_slp_transform_bb (bb); - - if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) - fprintf (vect_dump, "basic block vectorized using SLP\n"); - } + if (vect_slp_bb (bb)) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, + "basic block vectorized\n"); } free_stmt_vec_info_vec (); + + if (!in_loop_pipeline) + { + scev_finalize (); + loop_optimizer_finalize (); + } + return 0; } -static bool -gate_vect_slp (void) -{ - /* Apply SLP either if the vectorizer is on and the user didn't specify - whether to run SLP or not, or if the SLP flag was set by the user. */ - return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) - || flag_tree_slp_vectorize == 1); -} +} // anon namespace -struct gimple_opt_pass pass_slp_vectorize = +gimple_opt_pass * +make_pass_slp_vectorize (gcc::context *ctxt) { - { - GIMPLE_PASS, - "slp", /* name */ - gate_vect_slp, /* gate */ - execute_vect_slp, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_TREE_SLP_VECTORIZATION, /* tv_id */ - PROP_ssa | PROP_cfg, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_ggc_collect - | TODO_verify_ssa - | TODO_dump_func - | TODO_update_ssa - | TODO_verify_stmts /* todo_flags_finish */ - } -}; + return new pass_slp_vectorize (ctxt); +} /* Increase alignment of global arrays to improve vectorization potential. @@ -306,68 +1027,182 @@ This should involve global alignment analysis and in the future also array padding. */ +static unsigned get_vec_alignment_for_type (tree); +static hash_map<tree, unsigned> *type_align_map; + +/* Return alignment of array's vector type corresponding to scalar type. + 0 if no vector type exists. */ +static unsigned +get_vec_alignment_for_array_type (tree type) +{ + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + + tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); + if (!vectype + || !TYPE_SIZE (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) + return 0; + + return TYPE_ALIGN (vectype); +} + +/* Return alignment of field having maximum alignment of vector type + corresponding to it's scalar type. For now, we only consider fields whose + offset is a multiple of it's vector alignment. + 0 if no suitable field is found. */ +static unsigned +get_vec_alignment_for_record_type (tree type) +{ + gcc_assert (TREE_CODE (type) == RECORD_TYPE); + + unsigned max_align = 0, alignment; + HOST_WIDE_INT offset; + tree offset_tree; + + if (TYPE_PACKED (type)) + return 0; + + unsigned *slot = type_align_map->get (type); + if (slot) + return *slot; + + for (tree field = first_field (type); + field != NULL_TREE; + field = DECL_CHAIN (field)) + { + /* Skip if not FIELD_DECL or if alignment is set by user. */ + if (TREE_CODE (field) != FIELD_DECL + || DECL_USER_ALIGN (field) + || DECL_ARTIFICIAL (field)) + continue; + + /* We don't need to process the type further if offset is variable, + since the offsets of remaining members will also be variable. */ + if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST + || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) + break; + + /* Similarly stop processing the type if offset_tree + does not fit in unsigned HOST_WIDE_INT. */ + offset_tree = bit_position (field); + if (!tree_fits_uhwi_p (offset_tree)) + break; + + offset = tree_to_uhwi (offset_tree); + alignment = get_vec_alignment_for_type (TREE_TYPE (field)); + + /* Get maximum alignment of vectorized field/array among those members + whose offset is multiple of the vector alignment. */ + if (alignment + && (offset % alignment == 0) + && (alignment > max_align)) + max_align = alignment; + } + + type_align_map->put (type, max_align); + return max_align; +} + +/* Return alignment of vector type corresponding to decl's scalar type + or 0 if it doesn't exist or the vector alignment is lesser than + decl's alignment. */ +static unsigned +get_vec_alignment_for_type (tree type) +{ + if (type == NULL_TREE) + return 0; + + gcc_assert (TYPE_P (type)); + + static unsigned alignment = 0; + switch (TREE_CODE (type)) + { + case ARRAY_TYPE: + alignment = get_vec_alignment_for_array_type (type); + break; + case RECORD_TYPE: + alignment = get_vec_alignment_for_record_type (type); + break; + default: + alignment = 0; + break; + } + + return (alignment > TYPE_ALIGN (type)) ? alignment : 0; +} + +/* Entry point to increase_alignment pass. */ static unsigned int increase_alignment (void) { - struct varpool_node *vnode; + varpool_node *vnode; + + vect_location = UNKNOWN_LOCATION; + type_align_map = new hash_map<tree, unsigned>; /* Increase the alignment of all global arrays for vectorization. */ - for (vnode = varpool_nodes_queue; - vnode; - vnode = vnode->next_needed) + FOR_EACH_DEFINED_VARIABLE (vnode) { - tree vectype, decl = vnode->decl; - tree t; + tree decl = vnode->decl; unsigned int alignment; - t = TREE_TYPE(decl); - if (TREE_CODE (t) != ARRAY_TYPE) - continue; - vectype = get_vectype_for_scalar_type (strip_array_types (t)); - if (!vectype) - continue; - alignment = TYPE_ALIGN (vectype); - if (DECL_ALIGN (decl) >= alignment) - continue; + if ((decl_in_symtab_p (decl) + && !symtab_node::get (decl)->can_increase_alignment_p ()) + || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) + continue; - if (vect_can_force_dr_alignment_p (decl, alignment)) + alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); + if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) { - DECL_ALIGN (decl) = TYPE_ALIGN (vectype); - DECL_USER_ALIGN (decl) = 1; - if (dump_file) - { - fprintf (dump_file, "Increasing alignment of decl: "); - print_generic_expr (dump_file, decl, TDF_SLIM); - fprintf (dump_file, "\n"); - } + vnode->increase_alignment (alignment); + dump_printf (MSG_NOTE, "Increasing alignment of decl: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); + dump_printf (MSG_NOTE, "\n"); } } + + delete type_align_map; return 0; } -static bool -gate_increase_alignment (void) +namespace { + +const pass_data pass_data_ipa_increase_alignment = { - return flag_section_anchors && flag_tree_vectorize; -} + SIMPLE_IPA_PASS, /* type */ + "increase_alignment", /* name */ + OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ + TV_IPA_OPT, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; - -struct simple_ipa_opt_pass pass_ipa_increase_alignment = +class pass_ipa_increase_alignment : public simple_ipa_opt_pass { - { - SIMPLE_IPA_PASS, - "increase_alignment", /* name */ - gate_increase_alignment, /* gate */ - increase_alignment, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_IPA_OPT, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ - } -}; +public: + pass_ipa_increase_alignment (gcc::context *ctxt) + : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_section_anchors && flag_tree_loop_vectorize; + } + + virtual unsigned int execute (function *) { return increase_alignment (); } + +}; // class pass_ipa_increase_alignment + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_ipa_increase_alignment (gcc::context *ctxt) +{ + return new pass_ipa_increase_alignment (ctxt); +}