Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-parloops.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
130:e108057fa461 | 132:d34655255c78 |
---|---|
1 /* Loop autoparallelization. | 1 /* Loop autoparallelization. |
2 Copyright (C) 2006-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2006-2018 Free Software Foundation, Inc. |
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> | 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> |
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>. | 4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
182 | 182 |
183 */ | 183 */ |
184 | 184 |
185 /* Minimal number of iterations of a loop that should be executed in each | 185 /* Minimal number of iterations of a loop that should be executed in each |
186 thread. */ | 186 thread. */ |
187 #define MIN_PER_THREAD 100 | 187 #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD) |
188 | 188 |
189 /* Element of the hashtable, representing a | 189 /* Element of the hashtable, representing a |
190 reduction in the current loop. */ | 190 reduction in the current loop. */ |
191 struct reduction_info | 191 struct reduction_info |
192 { | 192 { |
2233 /* After the above dom info is hosed. Re-compute it. */ | 2233 /* After the above dom info is hosed. Re-compute it. */ |
2234 free_dominance_info (CDI_DOMINATORS); | 2234 free_dominance_info (CDI_DOMINATORS); |
2235 calculate_dominance_info (CDI_DOMINATORS); | 2235 calculate_dominance_info (CDI_DOMINATORS); |
2236 } | 2236 } |
2237 | 2237 |
2238 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the | |
2239 virtual phi. */ | |
2240 | |
2241 static unsigned int | |
2242 num_phis (basic_block bb, bool count_virtual_p) | |
2243 { | |
2244 unsigned int nr_phis = 0; | |
2245 gphi_iterator gsi; | |
2246 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2247 { | |
2248 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ()))) | |
2249 continue; | |
2250 | |
2251 nr_phis++; | |
2252 } | |
2253 | |
2254 return nr_phis; | |
2255 } | |
2256 | |
2238 /* Generates code to execute the iterations of LOOP in N_THREADS | 2257 /* Generates code to execute the iterations of LOOP in N_THREADS |
2239 threads in parallel, which can be 0 if that number is to be determined | 2258 threads in parallel, which can be 0 if that number is to be determined |
2240 later. | 2259 later. |
2241 | 2260 |
2242 NITER describes number of iterations of LOOP. | 2261 NITER describes number of iterations of LOOP. |
2334 m_p_thread=MIN_PER_THREAD; | 2353 m_p_thread=MIN_PER_THREAD; |
2335 | 2354 |
2336 gcc_checking_assert (n_threads != 0); | 2355 gcc_checking_assert (n_threads != 0); |
2337 many_iterations_cond = | 2356 many_iterations_cond = |
2338 fold_build2 (GE_EXPR, boolean_type_node, | 2357 fold_build2 (GE_EXPR, boolean_type_node, |
2339 nit, build_int_cst (type, m_p_thread * n_threads)); | 2358 nit, build_int_cst (type, m_p_thread * n_threads - 1)); |
2340 | 2359 |
2341 many_iterations_cond | 2360 many_iterations_cond |
2342 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, | 2361 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
2343 invert_truthvalue (unshare_expr (niter->may_be_zero)), | 2362 invert_truthvalue (unshare_expr (niter->may_be_zero)), |
2344 many_iterations_cond); | 2363 many_iterations_cond); |
2368 free_original_copy_tables (); | 2387 free_original_copy_tables (); |
2369 } | 2388 } |
2370 | 2389 |
2371 /* Base all the induction variables in LOOP on a single control one. */ | 2390 /* Base all the induction variables in LOOP on a single control one. */ |
2372 canonicalize_loop_ivs (loop, &nit, true); | 2391 canonicalize_loop_ivs (loop, &nit, true); |
2392 if (num_phis (loop->header, false) != reduction_list->elements () + 1) | |
2393 { | |
2394 /* The call to canonicalize_loop_ivs above failed to "base all the | |
2395 induction variables in LOOP on a single control one". Do damage | |
2396 control. */ | |
2397 basic_block preheader = loop_preheader_edge (loop)->src; | |
2398 basic_block cond_bb = single_pred (preheader); | |
2399 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb))); | |
2400 gimple_cond_make_true (cond); | |
2401 update_stmt (cond); | |
2402 /* We've gotten rid of the duplicate loop created by loop_version, but | |
2403 we can't undo whatever canonicalize_loop_ivs has done. | |
2404 TODO: Fix this properly by ensuring that the call to | |
2405 canonicalize_loop_ivs succeeds. */ | |
2406 if (dump_file | |
2407 && (dump_flags & TDF_DETAILS)) | |
2408 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d," | |
2409 " aborting transformation\n", loop->num); | |
2410 return; | |
2411 } | |
2373 | 2412 |
2374 /* Ensure that the exit condition is the first statement in the loop. | 2413 /* Ensure that the exit condition is the first statement in the loop. |
2375 The common case is that latch of the loop is empty (apart from the | 2414 The common case is that latch of the loop is empty (apart from the |
2376 increment) and immediately follows the loop exit test. Attempt to move the | 2415 increment) and immediately follows the loop exit test. Attempt to move the |
2377 entry of the loop directly before the exit check and increase the number of | 2416 entry of the loop directly before the exit check and increase the number of |
2529 struct reduction_info *const red = *slot; | 2568 struct reduction_info *const red = *slot; |
2530 gimple_set_uid (red->reduc_phi, red->reduc_version); | 2569 gimple_set_uid (red->reduc_phi, red->reduc_version); |
2531 return 1; | 2570 return 1; |
2532 } | 2571 } |
2533 | 2572 |
2573 /* Return true if the type of reduction performed by STMT_INFO is suitable | |
2574 for this pass. */ | |
2575 | |
2576 static bool | |
2577 valid_reduction_p (stmt_vec_info stmt_info) | |
2578 { | |
2579 /* Parallelization would reassociate the operation, which isn't | |
2580 allowed for in-order reductions. */ | |
2581 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info); | |
2582 return reduc_type != FOLD_LEFT_REDUCTION; | |
2583 } | |
2584 | |
2534 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */ | 2585 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */ |
2535 | 2586 |
2536 static void | 2587 static void |
2537 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list) | 2588 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list) |
2538 { | 2589 { |
2539 gphi_iterator gsi; | 2590 gphi_iterator gsi; |
2540 loop_vec_info simple_loop_info; | 2591 loop_vec_info simple_loop_info; |
2541 auto_vec<gphi *, 4> double_reduc_phis; | 2592 auto_vec<gphi *, 4> double_reduc_phis; |
2542 auto_vec<gimple *, 4> double_reduc_stmts; | 2593 auto_vec<gimple *, 4> double_reduc_stmts; |
2543 | 2594 |
2544 if (!stmt_vec_info_vec.exists ()) | 2595 vec_info_shared shared; |
2545 init_stmt_vec_info_vec (); | 2596 simple_loop_info = vect_analyze_loop_form (loop, &shared); |
2546 | |
2547 simple_loop_info = vect_analyze_loop_form (loop); | |
2548 if (simple_loop_info == NULL) | 2597 if (simple_loop_info == NULL) |
2549 goto gather_done; | 2598 goto gather_done; |
2550 | 2599 |
2551 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) | 2600 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) |
2552 { | 2601 { |
2559 continue; | 2608 continue; |
2560 | 2609 |
2561 if (simple_iv (loop, loop, res, &iv, true)) | 2610 if (simple_iv (loop, loop, res, &iv, true)) |
2562 continue; | 2611 continue; |
2563 | 2612 |
2564 gimple *reduc_stmt | 2613 stmt_vec_info reduc_stmt_info |
2565 = vect_force_simple_reduction (simple_loop_info, phi, | 2614 = vect_force_simple_reduction (simple_loop_info, |
2615 simple_loop_info->lookup_stmt (phi), | |
2566 &double_reduc, true); | 2616 &double_reduc, true); |
2567 if (!reduc_stmt) | 2617 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info)) |
2568 continue; | 2618 continue; |
2569 | 2619 |
2570 if (double_reduc) | 2620 if (double_reduc) |
2571 { | 2621 { |
2572 if (loop->inner->inner != NULL) | 2622 if (loop->inner->inner != NULL) |
2573 continue; | 2623 continue; |
2574 | 2624 |
2575 double_reduc_phis.safe_push (phi); | 2625 double_reduc_phis.safe_push (phi); |
2576 double_reduc_stmts.safe_push (reduc_stmt); | 2626 double_reduc_stmts.safe_push (reduc_stmt_info->stmt); |
2577 continue; | 2627 continue; |
2578 } | 2628 } |
2579 | 2629 |
2580 build_new_reduction (reduction_list, reduc_stmt, phi); | 2630 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi); |
2581 } | 2631 } |
2582 delete simple_loop_info; | 2632 delete simple_loop_info; |
2583 | 2633 |
2584 if (!double_reduc_phis.is_empty ()) | 2634 if (!double_reduc_phis.is_empty ()) |
2585 { | 2635 { |
2586 simple_loop_info = vect_analyze_loop_form (loop->inner); | 2636 vec_info_shared shared; |
2637 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared); | |
2587 if (simple_loop_info) | 2638 if (simple_loop_info) |
2588 { | 2639 { |
2589 gphi *phi; | 2640 gphi *phi; |
2590 unsigned int i; | 2641 unsigned int i; |
2591 | 2642 |
2604 gphi *inner_phi = as_a <gphi *> (inner_stmt); | 2655 gphi *inner_phi = as_a <gphi *> (inner_stmt); |
2605 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi), | 2656 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi), |
2606 &iv, true)) | 2657 &iv, true)) |
2607 continue; | 2658 continue; |
2608 | 2659 |
2609 gimple *inner_reduc_stmt | 2660 stmt_vec_info inner_phi_info |
2610 = vect_force_simple_reduction (simple_loop_info, inner_phi, | 2661 = simple_loop_info->lookup_stmt (inner_phi); |
2662 stmt_vec_info inner_reduc_stmt_info | |
2663 = vect_force_simple_reduction (simple_loop_info, | |
2664 inner_phi_info, | |
2611 &double_reduc, true); | 2665 &double_reduc, true); |
2612 gcc_assert (!double_reduc); | 2666 gcc_assert (!double_reduc); |
2613 if (inner_reduc_stmt == NULL) | 2667 if (!inner_reduc_stmt_info |
2668 || !valid_reduction_p (inner_reduc_stmt_info)) | |
2614 continue; | 2669 continue; |
2615 | 2670 |
2616 build_new_reduction (reduction_list, double_reduc_stmts[i], phi); | 2671 build_new_reduction (reduction_list, double_reduc_stmts[i], phi); |
2617 } | 2672 } |
2618 delete simple_loop_info; | 2673 delete simple_loop_info; |
2619 } | 2674 } |
2620 } | 2675 } |
2621 | 2676 |
2622 gather_done: | 2677 gather_done: |
2623 /* Release the claim on gimple_uid. */ | |
2624 free_stmt_vec_info_vec (); | |
2625 | |
2626 if (reduction_list->elements () == 0) | 2678 if (reduction_list->elements () == 0) |
2627 return; | 2679 return; |
2628 | 2680 |
2629 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form | 2681 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form |
2630 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only | 2682 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only |
2631 now. */ | 2683 now. */ |
2632 basic_block bb; | 2684 basic_block bb; |
2633 FOR_EACH_BB_FN (bb, cfun) | 2685 FOR_EACH_BB_FN (bb, cfun) |
2634 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 2686 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2635 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1); | 2687 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1); |
3022 } | 3074 } |
3023 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN) | 3075 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN) |
3024 continue; | 3076 continue; |
3025 else if (!gimple_has_side_effects (stmt) | 3077 else if (!gimple_has_side_effects (stmt) |
3026 && !gimple_could_trap_p (stmt) | 3078 && !gimple_could_trap_p (stmt) |
3027 && !stmt_could_throw_p (stmt) | 3079 && !stmt_could_throw_p (cfun, stmt) |
3028 && !gimple_vdef (stmt) | 3080 && !gimple_vdef (stmt) |
3029 && !gimple_vuse (stmt)) | 3081 && !gimple_vuse (stmt)) |
3030 continue; | 3082 continue; |
3031 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS)) | 3083 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS)) |
3032 continue; | 3084 continue; |
3228 struct loop *loop; | 3280 struct loop *loop; |
3229 struct loop *skip_loop = NULL; | 3281 struct loop *skip_loop = NULL; |
3230 struct tree_niter_desc niter_desc; | 3282 struct tree_niter_desc niter_desc; |
3231 struct obstack parloop_obstack; | 3283 struct obstack parloop_obstack; |
3232 HOST_WIDE_INT estimated; | 3284 HOST_WIDE_INT estimated; |
3233 source_location loop_loc; | |
3234 | 3285 |
3235 /* Do not parallelize loops in the functions created by parallelization. */ | 3286 /* Do not parallelize loops in the functions created by parallelization. */ |
3236 if (!oacc_kernels_p | 3287 if (!oacc_kernels_p |
3237 && parallelized_function_p (cfun->decl)) | 3288 && parallelized_function_p (cfun->decl)) |
3238 return false; | 3289 return false; |
3297 fprintf (dump_file, "loop %d is not innermost\n",loop->num); | 3348 fprintf (dump_file, "loop %d is not innermost\n",loop->num); |
3298 else | 3349 else |
3299 fprintf (dump_file, "loop %d is innermost\n",loop->num); | 3350 fprintf (dump_file, "loop %d is innermost\n",loop->num); |
3300 } | 3351 } |
3301 | 3352 |
3302 /* If we use autopar in graphite pass, we use its marked dependency | |
3303 checking results. */ | |
3304 if (flag_loop_parallelize_all && !loop->can_be_parallel) | |
3305 { | |
3306 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3307 fprintf (dump_file, "loop is not parallel according to graphite\n"); | |
3308 continue; | |
3309 } | |
3310 | |
3311 if (!single_dom_exit (loop)) | 3353 if (!single_dom_exit (loop)) |
3312 { | 3354 { |
3313 | 3355 |
3314 if (dump_file && (dump_flags & TDF_DETAILS)) | 3356 if (dump_file && (dump_flags & TDF_DETAILS)) |
3315 fprintf (dump_file, "loop is !single_dom_exit\n"); | 3357 fprintf (dump_file, "loop is !single_dom_exit\n"); |
3323 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) | 3365 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) |
3324 /* FIXME: the check for vector phi nodes could be removed. */ | 3366 /* FIXME: the check for vector phi nodes could be removed. */ |
3325 || loop_has_vector_phi_nodes (loop)) | 3367 || loop_has_vector_phi_nodes (loop)) |
3326 continue; | 3368 continue; |
3327 | 3369 |
3328 estimated = estimated_stmt_executions_int (loop); | 3370 estimated = estimated_loop_iterations_int (loop); |
3329 if (estimated == -1) | 3371 if (estimated == -1) |
3330 estimated = likely_max_stmt_executions_int (loop); | 3372 estimated = get_likely_max_loop_iterations_int (loop); |
3331 /* FIXME: Bypass this check as graphite doesn't update the | 3373 /* FIXME: Bypass this check as graphite doesn't update the |
3332 count and frequency correctly now. */ | 3374 count and frequency correctly now. */ |
3333 if (!flag_loop_parallelize_all | 3375 if (!flag_loop_parallelize_all |
3334 && !oacc_kernels_p | 3376 && !oacc_kernels_p |
3335 && ((estimated != -1 | 3377 && ((estimated != -1 |
3336 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) | 3378 && (estimated |
3379 < ((HOST_WIDE_INT) n_threads | |
3380 * (loop->inner ? 2 : MIN_PER_THREAD) - 1))) | |
3337 /* Do not bother with loops in cold areas. */ | 3381 /* Do not bother with loops in cold areas. */ |
3338 || optimize_loop_nest_for_size_p (loop))) | 3382 || optimize_loop_nest_for_size_p (loop))) |
3339 continue; | 3383 continue; |
3340 | 3384 |
3341 if (!try_get_loop_niter (loop, &niter_desc)) | 3385 if (!try_get_loop_niter (loop, &niter_desc)) |
3345 continue; | 3389 continue; |
3346 | 3390 |
3347 if (loop_has_phi_with_address_arg (loop)) | 3391 if (loop_has_phi_with_address_arg (loop)) |
3348 continue; | 3392 continue; |
3349 | 3393 |
3350 if (!flag_loop_parallelize_all | 3394 if (!loop->can_be_parallel |
3351 && !loop_parallel_p (loop, &parloop_obstack)) | 3395 && !loop_parallel_p (loop, &parloop_obstack)) |
3352 continue; | 3396 continue; |
3353 | 3397 |
3354 if (oacc_kernels_p | 3398 if (oacc_kernels_p |
3355 && !oacc_entry_exit_ok (loop, &reduction_list)) | 3399 && !oacc_entry_exit_ok (loop, &reduction_list)) |
3360 } | 3404 } |
3361 | 3405 |
3362 changed = true; | 3406 changed = true; |
3363 skip_loop = loop->inner; | 3407 skip_loop = loop->inner; |
3364 | 3408 |
3365 loop_loc = find_loop_location (loop); | 3409 dump_user_location_t loop_loc = find_loop_location (loop); |
3366 if (loop->inner) | 3410 if (loop->inner) |
3367 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, | 3411 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, |
3368 "parallelizing outer loop %d\n", loop->num); | 3412 "parallelizing outer loop %d\n", loop->num); |
3369 else | 3413 else |
3370 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, | 3414 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, |