comparison gcc/tree-parloops.c @ 132:d34655255c78

update gcc-8.2
author mir3636
date Thu, 25 Oct 2018 10:21:07 +0900
parents 84e7813d76e9
children 1830386684a0
comparison
equal deleted inserted replaced
130:e108057fa461 132:d34655255c78
1 /* Loop autoparallelization. 1 /* Loop autoparallelization.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc. 2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>. 4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
182 182
183 */ 183 */
184 184
185 /* Minimal number of iterations of a loop that should be executed in each 185 /* Minimal number of iterations of a loop that should be executed in each
186 thread. */ 186 thread. */
187 #define MIN_PER_THREAD 100 187 #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
188 188
189 /* Element of the hashtable, representing a 189 /* Element of the hashtable, representing a
190 reduction in the current loop. */ 190 reduction in the current loop. */
191 struct reduction_info 191 struct reduction_info
192 { 192 {
2233 /* After the above dom info is hosed. Re-compute it. */ 2233 /* After the above dom info is hosed. Re-compute it. */
2234 free_dominance_info (CDI_DOMINATORS); 2234 free_dominance_info (CDI_DOMINATORS);
2235 calculate_dominance_info (CDI_DOMINATORS); 2235 calculate_dominance_info (CDI_DOMINATORS);
2236 } 2236 }
2237 2237
2238 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2239 virtual phi. */
2240
2241 static unsigned int
2242 num_phis (basic_block bb, bool count_virtual_p)
2243 {
2244 unsigned int nr_phis = 0;
2245 gphi_iterator gsi;
2246 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2247 {
2248 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2249 continue;
2250
2251 nr_phis++;
2252 }
2253
2254 return nr_phis;
2255 }
2256
2238 /* Generates code to execute the iterations of LOOP in N_THREADS 2257 /* Generates code to execute the iterations of LOOP in N_THREADS
2239 threads in parallel, which can be 0 if that number is to be determined 2258 threads in parallel, which can be 0 if that number is to be determined
2240 later. 2259 later.
2241 2260
2242 NITER describes number of iterations of LOOP. 2261 NITER describes number of iterations of LOOP.
2334 m_p_thread=MIN_PER_THREAD; 2353 m_p_thread=MIN_PER_THREAD;
2335 2354
2336 gcc_checking_assert (n_threads != 0); 2355 gcc_checking_assert (n_threads != 0);
2337 many_iterations_cond = 2356 many_iterations_cond =
2338 fold_build2 (GE_EXPR, boolean_type_node, 2357 fold_build2 (GE_EXPR, boolean_type_node,
2339 nit, build_int_cst (type, m_p_thread * n_threads)); 2358 nit, build_int_cst (type, m_p_thread * n_threads - 1));
2340 2359
2341 many_iterations_cond 2360 many_iterations_cond
2342 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2361 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2343 invert_truthvalue (unshare_expr (niter->may_be_zero)), 2362 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2344 many_iterations_cond); 2363 many_iterations_cond);
2368 free_original_copy_tables (); 2387 free_original_copy_tables ();
2369 } 2388 }
2370 2389
2371 /* Base all the induction variables in LOOP on a single control one. */ 2390 /* Base all the induction variables in LOOP on a single control one. */
2372 canonicalize_loop_ivs (loop, &nit, true); 2391 canonicalize_loop_ivs (loop, &nit, true);
2392 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
2393 {
2394 /* The call to canonicalize_loop_ivs above failed to "base all the
2395 induction variables in LOOP on a single control one". Do damage
2396 control. */
2397 basic_block preheader = loop_preheader_edge (loop)->src;
2398 basic_block cond_bb = single_pred (preheader);
2399 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
2400 gimple_cond_make_true (cond);
2401 update_stmt (cond);
2402 /* We've gotten rid of the duplicate loop created by loop_version, but
2403 we can't undo whatever canonicalize_loop_ivs has done.
2404 TODO: Fix this properly by ensuring that the call to
2405 canonicalize_loop_ivs succeeds. */
2406 if (dump_file
2407 && (dump_flags & TDF_DETAILS))
2408 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
2409 " aborting transformation\n", loop->num);
2410 return;
2411 }
2373 2412
2374 /* Ensure that the exit condition is the first statement in the loop. 2413 /* Ensure that the exit condition is the first statement in the loop.
2375 The common case is that latch of the loop is empty (apart from the 2414 The common case is that latch of the loop is empty (apart from the
2376 increment) and immediately follows the loop exit test. Attempt to move the 2415 increment) and immediately follows the loop exit test. Attempt to move the
2377 entry of the loop directly before the exit check and increase the number of 2416 entry of the loop directly before the exit check and increase the number of
2529 struct reduction_info *const red = *slot; 2568 struct reduction_info *const red = *slot;
2530 gimple_set_uid (red->reduc_phi, red->reduc_version); 2569 gimple_set_uid (red->reduc_phi, red->reduc_version);
2531 return 1; 2570 return 1;
2532 } 2571 }
2533 2572
2573 /* Return true if the type of reduction performed by STMT_INFO is suitable
2574 for this pass. */
2575
2576 static bool
2577 valid_reduction_p (stmt_vec_info stmt_info)
2578 {
2579 /* Parallelization would reassociate the operation, which isn't
2580 allowed for in-order reductions. */
2581 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
2582 return reduc_type != FOLD_LEFT_REDUCTION;
2583 }
2584
2534 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */ 2585 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2535 2586
2536 static void 2587 static void
2537 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list) 2588 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2538 { 2589 {
2539 gphi_iterator gsi; 2590 gphi_iterator gsi;
2540 loop_vec_info simple_loop_info; 2591 loop_vec_info simple_loop_info;
2541 auto_vec<gphi *, 4> double_reduc_phis; 2592 auto_vec<gphi *, 4> double_reduc_phis;
2542 auto_vec<gimple *, 4> double_reduc_stmts; 2593 auto_vec<gimple *, 4> double_reduc_stmts;
2543 2594
2544 if (!stmt_vec_info_vec.exists ()) 2595 vec_info_shared shared;
2545 init_stmt_vec_info_vec (); 2596 simple_loop_info = vect_analyze_loop_form (loop, &shared);
2546
2547 simple_loop_info = vect_analyze_loop_form (loop);
2548 if (simple_loop_info == NULL) 2597 if (simple_loop_info == NULL)
2549 goto gather_done; 2598 goto gather_done;
2550 2599
2551 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 2600 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2552 { 2601 {
2559 continue; 2608 continue;
2560 2609
2561 if (simple_iv (loop, loop, res, &iv, true)) 2610 if (simple_iv (loop, loop, res, &iv, true))
2562 continue; 2611 continue;
2563 2612
2564 gimple *reduc_stmt 2613 stmt_vec_info reduc_stmt_info
2565 = vect_force_simple_reduction (simple_loop_info, phi, 2614 = vect_force_simple_reduction (simple_loop_info,
2615 simple_loop_info->lookup_stmt (phi),
2566 &double_reduc, true); 2616 &double_reduc, true);
2567 if (!reduc_stmt) 2617 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
2568 continue; 2618 continue;
2569 2619
2570 if (double_reduc) 2620 if (double_reduc)
2571 { 2621 {
2572 if (loop->inner->inner != NULL) 2622 if (loop->inner->inner != NULL)
2573 continue; 2623 continue;
2574 2624
2575 double_reduc_phis.safe_push (phi); 2625 double_reduc_phis.safe_push (phi);
2576 double_reduc_stmts.safe_push (reduc_stmt); 2626 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
2577 continue; 2627 continue;
2578 } 2628 }
2579 2629
2580 build_new_reduction (reduction_list, reduc_stmt, phi); 2630 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
2581 } 2631 }
2582 delete simple_loop_info; 2632 delete simple_loop_info;
2583 2633
2584 if (!double_reduc_phis.is_empty ()) 2634 if (!double_reduc_phis.is_empty ())
2585 { 2635 {
2586 simple_loop_info = vect_analyze_loop_form (loop->inner); 2636 vec_info_shared shared;
2637 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
2587 if (simple_loop_info) 2638 if (simple_loop_info)
2588 { 2639 {
2589 gphi *phi; 2640 gphi *phi;
2590 unsigned int i; 2641 unsigned int i;
2591 2642
2604 gphi *inner_phi = as_a <gphi *> (inner_stmt); 2655 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2605 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi), 2656 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2606 &iv, true)) 2657 &iv, true))
2607 continue; 2658 continue;
2608 2659
2609 gimple *inner_reduc_stmt 2660 stmt_vec_info inner_phi_info
2610 = vect_force_simple_reduction (simple_loop_info, inner_phi, 2661 = simple_loop_info->lookup_stmt (inner_phi);
2662 stmt_vec_info inner_reduc_stmt_info
2663 = vect_force_simple_reduction (simple_loop_info,
2664 inner_phi_info,
2611 &double_reduc, true); 2665 &double_reduc, true);
2612 gcc_assert (!double_reduc); 2666 gcc_assert (!double_reduc);
2613 if (inner_reduc_stmt == NULL) 2667 if (!inner_reduc_stmt_info
2668 || !valid_reduction_p (inner_reduc_stmt_info))
2614 continue; 2669 continue;
2615 2670
2616 build_new_reduction (reduction_list, double_reduc_stmts[i], phi); 2671 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
2617 } 2672 }
2618 delete simple_loop_info; 2673 delete simple_loop_info;
2619 } 2674 }
2620 } 2675 }
2621 2676
2622 gather_done: 2677 gather_done:
2623 /* Release the claim on gimple_uid. */
2624 free_stmt_vec_info_vec ();
2625
2626 if (reduction_list->elements () == 0) 2678 if (reduction_list->elements () == 0)
2627 return; 2679 return;
2628 2680
2629 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form 2681 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2630 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only 2682 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
2631 now. */ 2683 now. */
2632 basic_block bb; 2684 basic_block bb;
2633 FOR_EACH_BB_FN (bb, cfun) 2685 FOR_EACH_BB_FN (bb, cfun)
2634 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2686 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2635 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1); 2687 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3022 } 3074 }
3023 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN) 3075 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3024 continue; 3076 continue;
3025 else if (!gimple_has_side_effects (stmt) 3077 else if (!gimple_has_side_effects (stmt)
3026 && !gimple_could_trap_p (stmt) 3078 && !gimple_could_trap_p (stmt)
3027 && !stmt_could_throw_p (stmt) 3079 && !stmt_could_throw_p (cfun, stmt)
3028 && !gimple_vdef (stmt) 3080 && !gimple_vdef (stmt)
3029 && !gimple_vuse (stmt)) 3081 && !gimple_vuse (stmt))
3030 continue; 3082 continue;
3031 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS)) 3083 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3032 continue; 3084 continue;
3228 struct loop *loop; 3280 struct loop *loop;
3229 struct loop *skip_loop = NULL; 3281 struct loop *skip_loop = NULL;
3230 struct tree_niter_desc niter_desc; 3282 struct tree_niter_desc niter_desc;
3231 struct obstack parloop_obstack; 3283 struct obstack parloop_obstack;
3232 HOST_WIDE_INT estimated; 3284 HOST_WIDE_INT estimated;
3233 source_location loop_loc;
3234 3285
3235 /* Do not parallelize loops in the functions created by parallelization. */ 3286 /* Do not parallelize loops in the functions created by parallelization. */
3236 if (!oacc_kernels_p 3287 if (!oacc_kernels_p
3237 && parallelized_function_p (cfun->decl)) 3288 && parallelized_function_p (cfun->decl))
3238 return false; 3289 return false;
3297 fprintf (dump_file, "loop %d is not innermost\n",loop->num); 3348 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
3298 else 3349 else
3299 fprintf (dump_file, "loop %d is innermost\n",loop->num); 3350 fprintf (dump_file, "loop %d is innermost\n",loop->num);
3300 } 3351 }
3301 3352
3302 /* If we use autopar in graphite pass, we use its marked dependency
3303 checking results. */
3304 if (flag_loop_parallelize_all && !loop->can_be_parallel)
3305 {
3306 if (dump_file && (dump_flags & TDF_DETAILS))
3307 fprintf (dump_file, "loop is not parallel according to graphite\n");
3308 continue;
3309 }
3310
3311 if (!single_dom_exit (loop)) 3353 if (!single_dom_exit (loop))
3312 { 3354 {
3313 3355
3314 if (dump_file && (dump_flags & TDF_DETAILS)) 3356 if (dump_file && (dump_flags & TDF_DETAILS))
3315 fprintf (dump_file, "loop is !single_dom_exit\n"); 3357 fprintf (dump_file, "loop is !single_dom_exit\n");
3323 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) 3365 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
3324 /* FIXME: the check for vector phi nodes could be removed. */ 3366 /* FIXME: the check for vector phi nodes could be removed. */
3325 || loop_has_vector_phi_nodes (loop)) 3367 || loop_has_vector_phi_nodes (loop))
3326 continue; 3368 continue;
3327 3369
3328 estimated = estimated_stmt_executions_int (loop); 3370 estimated = estimated_loop_iterations_int (loop);
3329 if (estimated == -1) 3371 if (estimated == -1)
3330 estimated = likely_max_stmt_executions_int (loop); 3372 estimated = get_likely_max_loop_iterations_int (loop);
3331 /* FIXME: Bypass this check as graphite doesn't update the 3373 /* FIXME: Bypass this check as graphite doesn't update the
3332 count and frequency correctly now. */ 3374 count and frequency correctly now. */
3333 if (!flag_loop_parallelize_all 3375 if (!flag_loop_parallelize_all
3334 && !oacc_kernels_p 3376 && !oacc_kernels_p
3335 && ((estimated != -1 3377 && ((estimated != -1
3336 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) 3378 && (estimated
3379 < ((HOST_WIDE_INT) n_threads
3380 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
3337 /* Do not bother with loops in cold areas. */ 3381 /* Do not bother with loops in cold areas. */
3338 || optimize_loop_nest_for_size_p (loop))) 3382 || optimize_loop_nest_for_size_p (loop)))
3339 continue; 3383 continue;
3340 3384
3341 if (!try_get_loop_niter (loop, &niter_desc)) 3385 if (!try_get_loop_niter (loop, &niter_desc))
3345 continue; 3389 continue;
3346 3390
3347 if (loop_has_phi_with_address_arg (loop)) 3391 if (loop_has_phi_with_address_arg (loop))
3348 continue; 3392 continue;
3349 3393
3350 if (!flag_loop_parallelize_all 3394 if (!loop->can_be_parallel
3351 && !loop_parallel_p (loop, &parloop_obstack)) 3395 && !loop_parallel_p (loop, &parloop_obstack))
3352 continue; 3396 continue;
3353 3397
3354 if (oacc_kernels_p 3398 if (oacc_kernels_p
3355 && !oacc_entry_exit_ok (loop, &reduction_list)) 3399 && !oacc_entry_exit_ok (loop, &reduction_list))
3360 } 3404 }
3361 3405
3362 changed = true; 3406 changed = true;
3363 skip_loop = loop->inner; 3407 skip_loop = loop->inner;
3364 3408
3365 loop_loc = find_loop_location (loop); 3409 dump_user_location_t loop_loc = find_loop_location (loop);
3366 if (loop->inner) 3410 if (loop->inner)
3367 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, 3411 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3368 "parallelizing outer loop %d\n", loop->num); 3412 "parallelizing outer loop %d\n", loop->num);
3369 else 3413 else
3370 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc, 3414 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,