Mercurial > hg > CbC > CbC_gcc
comparison gcc/omp-expand.c @ 146:351920fa3827
merge
author | anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 01 Mar 2020 16:13:28 +0900 |
parents | 1830386684a0 |
children |
comparison
equal
deleted
inserted
replaced
144:8f4e72ab4e11 | 146:351920fa3827 |
---|---|
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP | 1 /* Expansion pass for OMP directives. Outlines regions of certain OMP |
2 directives to separate functions, converts others into explicit calls to the | 2 directives to separate functions, converts others into explicit calls to the |
3 runtime library (libgomp) and so forth | 3 runtime library (libgomp) and so forth |
4 | 4 |
5 Copyright (C) 2005-2018 Free Software Foundation, Inc. | 5 Copyright (C) 2005-2020 Free Software Foundation, Inc. |
6 | 6 |
7 This file is part of GCC. | 7 This file is part of GCC. |
8 | 8 |
9 GCC is free software; you can redistribute it and/or modify it under | 9 GCC is free software; you can redistribute it and/or modify it under |
10 the terms of the GNU General Public License as published by the Free | 10 the terms of the GNU General Public License as published by the Free |
50 #include "splay-tree.h" | 50 #include "splay-tree.h" |
51 #include "cfgloop.h" | 51 #include "cfgloop.h" |
52 #include "omp-general.h" | 52 #include "omp-general.h" |
53 #include "omp-offload.h" | 53 #include "omp-offload.h" |
54 #include "tree-cfgcleanup.h" | 54 #include "tree-cfgcleanup.h" |
55 #include "alloc-pool.h" | |
55 #include "symbol-summary.h" | 56 #include "symbol-summary.h" |
56 #include "gomp-constants.h" | 57 #include "gomp-constants.h" |
57 #include "gimple-pretty-print.h" | 58 #include "gimple-pretty-print.h" |
58 #include "hsa-common.h" | 59 #include "hsa-common.h" |
59 #include "debug.h" | |
60 #include "stringpool.h" | 60 #include "stringpool.h" |
61 #include "attribs.h" | 61 #include "attribs.h" |
62 | 62 |
63 /* OMP region information. Every parallel and workshare | 63 /* OMP region information. Every parallel and workshare |
64 directive is enclosed between two markers, the OMP_* directive | 64 directive is enclosed between two markers, the OMP_* directive |
98 /* Schedule modifiers. */ | 98 /* Schedule modifiers. */ |
99 unsigned char sched_modifiers; | 99 unsigned char sched_modifiers; |
100 | 100 |
101 /* True if this is a combined parallel+workshare region. */ | 101 /* True if this is a combined parallel+workshare region. */ |
102 bool is_combined_parallel; | 102 bool is_combined_parallel; |
103 | |
104 /* Copy of fd.lastprivate_conditional != 0. */ | |
105 bool has_lastprivate_conditional; | |
103 | 106 |
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has | 107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has |
105 a depend clause. */ | 108 a depend clause. */ |
106 gomp_ordered *ord_stmt; | 109 gomp_ordered *ord_stmt; |
107 }; | 110 }; |
172 | 175 |
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) | 176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) |
174 return true; | 177 return true; |
175 | 178 |
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); | 179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); |
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) | |
181 return false; | |
177 | 182 |
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); | 183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); |
179 | 184 |
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) | 185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) |
181 return false; | 186 return false; |
200 presence (SIMD_SCHEDULE). */ | 205 presence (SIMD_SCHEDULE). */ |
201 | 206 |
202 static tree | 207 static tree |
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) | 208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) |
204 { | 209 { |
205 if (!simd_schedule) | 210 if (!simd_schedule || integer_zerop (chunk_size)) |
206 return chunk_size; | 211 return chunk_size; |
207 | 212 |
208 poly_uint64 vf = omp_max_vf (); | 213 poly_uint64 vf = omp_max_vf (); |
209 if (known_eq (vf, 1U)) | 214 if (known_eq (vf, 1U)) |
210 return chunk_size; | 215 return chunk_size; |
307 WS_EXIT_BB -> PAR_EXIT_BB. */ | 312 WS_EXIT_BB -> PAR_EXIT_BB. */ |
308 par_entry_bb = region->entry; | 313 par_entry_bb = region->entry; |
309 par_exit_bb = region->exit; | 314 par_exit_bb = region->exit; |
310 ws_entry_bb = region->inner->entry; | 315 ws_entry_bb = region->inner->entry; |
311 ws_exit_bb = region->inner->exit; | 316 ws_exit_bb = region->inner->exit; |
317 | |
318 /* Give up for task reductions on the parallel, while it is implementable, | |
319 adding another big set of APIs or slowing down the normal paths is | |
320 not acceptable. */ | |
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); | |
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) | |
323 return; | |
312 | 324 |
313 if (single_succ (par_entry_bb) == ws_entry_bb | 325 if (single_succ (par_entry_bb) == ws_entry_bb |
314 && single_succ (ws_exit_bb) == par_exit_bb | 326 && single_succ (ws_exit_bb) == par_exit_bb |
315 && workshare_safe_to_combine_p (ws_entry_bb) | 327 && workshare_safe_to_combine_p (ws_entry_bb) |
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) | 328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) |
334 tree clauses = gimple_omp_for_clauses (ws_stmt); | 346 tree clauses = gimple_omp_for_clauses (ws_stmt); |
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); | 347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); |
336 if (c == NULL | 348 if (c == NULL |
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) | 349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) |
338 == OMP_CLAUSE_SCHEDULE_STATIC) | 350 == OMP_CLAUSE_SCHEDULE_STATIC) |
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) | 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) |
340 { | 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_) |
341 region->is_combined_parallel = false; | 353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_)) |
342 region->inner->is_combined_parallel = false; | 354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) |
343 return; | 355 return; |
344 } | 356 } |
345 } | 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS |
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt), | |
359 OMP_CLAUSE__REDUCTEMP_) | |
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt), | |
361 OMP_CLAUSE__CONDTEMP_))) | |
362 return; | |
346 | 363 |
347 region->is_combined_parallel = true; | 364 region->is_combined_parallel = true; |
348 region->inner->is_combined_parallel = true; | 365 region->inner->is_combined_parallel = true; |
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); | 366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); |
350 } | 367 } |
505 | 522 |
506 Together, these actions ensure that the debug info for the outlined | 523 Together, these actions ensure that the debug info for the outlined |
507 function will be emitted with the correct lexical scope. */ | 524 function will be emitted with the correct lexical scope. */ |
508 | 525 |
509 static void | 526 static void |
510 adjust_context_and_scope (tree entry_block, tree child_fndecl) | 527 adjust_context_and_scope (struct omp_region *region, tree entry_block, |
528 tree child_fndecl) | |
511 { | 529 { |
530 tree parent_fndecl = NULL_TREE; | |
531 gimple *entry_stmt; | |
532 /* OMP expansion expands inner regions before outer ones, so if | |
533 we e.g. have explicit task region nested in parallel region, when | |
534 expanding the task region current_function_decl will be the original | |
535 source function, but we actually want to use as context the child | |
536 function of the parallel. */ | |
537 for (region = region->outer; | |
538 region && parent_fndecl == NULL_TREE; region = region->outer) | |
539 switch (region->type) | |
540 { | |
541 case GIMPLE_OMP_PARALLEL: | |
542 case GIMPLE_OMP_TASK: | |
543 case GIMPLE_OMP_TEAMS: | |
544 entry_stmt = last_stmt (region->entry); | |
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); | |
546 break; | |
547 case GIMPLE_OMP_TARGET: | |
548 entry_stmt = last_stmt (region->entry); | |
549 parent_fndecl | |
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); | |
551 break; | |
552 default: | |
553 break; | |
554 } | |
555 | |
556 if (parent_fndecl == NULL_TREE) | |
557 parent_fndecl = current_function_decl; | |
558 DECL_CONTEXT (child_fndecl) = parent_fndecl; | |
559 | |
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) | 560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) |
513 { | 561 { |
514 tree b = BLOCK_SUPERCONTEXT (entry_block); | 562 tree b = BLOCK_SUPERCONTEXT (entry_block); |
515 | |
516 if (TREE_CODE (b) == BLOCK) | 563 if (TREE_CODE (b) == BLOCK) |
517 { | 564 { |
518 tree parent_fndecl; | |
519 | |
520 /* Follow supercontext chain until the parent fndecl | |
521 is found. */ | |
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b); | |
523 TREE_CODE (parent_fndecl) == BLOCK; | |
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl)) | |
525 ; | |
526 | |
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL); | |
528 | |
529 DECL_CONTEXT (child_fndecl) = parent_fndecl; | |
530 | |
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); | 565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); |
532 BLOCK_VARS (b) = child_fndecl; | 566 BLOCK_VARS (b) = child_fndecl; |
533 } | 567 } |
534 } | 568 } |
535 } | 569 } |
536 | 570 |
537 /* Build the function calls to GOMP_parallel_start etc to actually | 571 /* Build the function calls to GOMP_parallel etc to actually |
538 generate the parallel operation. REGION is the parallel region | 572 generate the parallel operation. REGION is the parallel region |
539 being expanded. BB is the block where to insert the code. WS_ARGS | 573 being expanded. BB is the block where to insert the code. WS_ARGS |
540 will be set if this is a call to a combined parallel+workshare | 574 will be set if this is a call to a combined parallel+workshare |
541 construct, it contains the list of additional arguments needed by | 575 construct, it contains the list of additional arguments needed by |
542 the workshare construct. */ | 576 the workshare construct. */ |
557 clauses = gimple_omp_parallel_clauses (entry_stmt); | 591 clauses = gimple_omp_parallel_clauses (entry_stmt); |
558 | 592 |
559 /* Determine what flavor of GOMP_parallel we will be | 593 /* Determine what flavor of GOMP_parallel we will be |
560 emitting. */ | 594 emitting. */ |
561 start_ix = BUILT_IN_GOMP_PARALLEL; | 595 start_ix = BUILT_IN_GOMP_PARALLEL; |
562 if (is_combined_parallel (region)) | 596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); |
597 if (rtmp) | |
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; | |
599 else if (is_combined_parallel (region)) | |
563 { | 600 { |
564 switch (region->inner->type) | 601 switch (region->inner->type) |
565 { | 602 { |
566 case GIMPLE_OMP_FOR: | 603 case GIMPLE_OMP_FOR: |
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); | 604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); |
568 switch (region->inner->sched_kind) | 605 switch (region->inner->sched_kind) |
569 { | 606 { |
570 case OMP_CLAUSE_SCHEDULE_RUNTIME: | 607 case OMP_CLAUSE_SCHEDULE_RUNTIME: |
571 start_ix2 = 3; | 608 /* For lastprivate(conditional:), our implementation |
609 requires monotonic behavior. */ | |
610 if (region->inner->has_lastprivate_conditional != 0) | |
611 start_ix2 = 3; | |
612 else if ((region->inner->sched_modifiers | |
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) | |
614 start_ix2 = 6; | |
615 else if ((region->inner->sched_modifiers | |
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) | |
617 start_ix2 = 7; | |
618 else | |
619 start_ix2 = 3; | |
572 break; | 620 break; |
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC: | 621 case OMP_CLAUSE_SCHEDULE_DYNAMIC: |
574 case OMP_CLAUSE_SCHEDULE_GUIDED: | 622 case OMP_CLAUSE_SCHEDULE_GUIDED: |
575 if (region->inner->sched_modifiers | 623 if ((region->inner->sched_modifiers |
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) | 624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 |
625 && !region->inner->has_lastprivate_conditional) | |
577 { | 626 { |
578 start_ix2 = 3 + region->inner->sched_kind; | 627 start_ix2 = 3 + region->inner->sched_kind; |
579 break; | 628 break; |
580 } | 629 } |
581 /* FALLTHRU */ | 630 /* FALLTHRU */ |
701 else | 750 else |
702 t1 = build_fold_addr_expr (t); | 751 t1 = build_fold_addr_expr (t); |
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); | 752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); |
704 t2 = build_fold_addr_expr (child_fndecl); | 753 t2 = build_fold_addr_expr (child_fndecl); |
705 | 754 |
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl); | |
707 | |
708 vec_alloc (args, 4 + vec_safe_length (ws_args)); | 755 vec_alloc (args, 4 + vec_safe_length (ws_args)); |
709 args->quick_push (t2); | 756 args->quick_push (t2); |
710 args->quick_push (t1); | 757 args->quick_push (t1); |
711 args->quick_push (val); | 758 args->quick_push (val); |
712 if (ws_args) | 759 if (ws_args) |
714 args->quick_push (flags); | 761 args->quick_push (flags); |
715 | 762 |
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, | 763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, |
717 builtin_decl_explicit (start_ix), args); | 764 builtin_decl_explicit (start_ix), args); |
718 | 765 |
766 if (rtmp) | |
767 { | |
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); | |
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), | |
770 fold_convert (type, | |
771 fold_convert (pointer_sized_int_node, t))); | |
772 } | |
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | 773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
720 false, GSI_CONTINUE_LINKING); | 774 false, GSI_CONTINUE_LINKING); |
721 | 775 |
722 if (hsa_gen_requested_p () | 776 if (hsa_gen_requested_p () |
723 && parallel_needs_hsa_kernel_p (region)) | 777 && parallel_needs_hsa_kernel_p (region)) |
790 if (ifc == NULL_TREE) | 844 if (ifc == NULL_TREE) |
791 iflags |= GOMP_TASK_FLAG_IF; | 845 iflags |= GOMP_TASK_FLAG_IF; |
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) | 846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) |
793 iflags |= GOMP_TASK_FLAG_NOGROUP; | 847 iflags |= GOMP_TASK_FLAG_NOGROUP; |
794 ull = fd.iter_type == long_long_unsigned_type_node; | 848 ull = fd.iter_type == long_long_unsigned_type_node; |
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) | |
850 iflags |= GOMP_TASK_FLAG_REDUCTION; | |
795 } | 851 } |
796 else if (priority) | 852 else if (priority) |
797 iflags |= GOMP_TASK_FLAG_PRIORITY; | 853 iflags |= GOMP_TASK_FLAG_PRIORITY; |
798 | 854 |
799 tree flags = build_int_cst (unsigned_type_node, iflags); | 855 tree flags = build_int_cst (unsigned_type_node, iflags); |
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), | 915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), |
860 9, t1, t2, t3, | 916 9, t1, t2, t3, |
861 gimple_omp_task_arg_size (entry_stmt), | 917 gimple_omp_task_arg_size (entry_stmt), |
862 gimple_omp_task_arg_align (entry_stmt), cond, flags, | 918 gimple_omp_task_arg_align (entry_stmt), cond, flags, |
863 depend, priority); | 919 depend, priority); |
920 | |
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | |
922 false, GSI_CONTINUE_LINKING); | |
923 } | |
924 | |
925 /* Build the function call to GOMP_taskwait_depend to actually | |
926 generate the taskwait operation. BB is the block where to insert the | |
927 code. */ | |
928 | |
929 static void | |
930 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) | |
931 { | |
932 tree clauses = gimple_omp_task_clauses (entry_stmt); | |
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); | |
934 if (depend == NULL_TREE) | |
935 return; | |
936 | |
937 depend = OMP_CLAUSE_DECL (depend); | |
938 | |
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); | |
940 tree t | |
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND), | |
942 1, depend); | |
943 | |
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | |
945 false, GSI_CONTINUE_LINKING); | |
946 } | |
947 | |
948 /* Build the function call to GOMP_teams_reg to actually | |
949 generate the host teams operation. REGION is the teams region | |
950 being expanded. BB is the block where to insert the code. */ | |
951 | |
952 static void | |
953 expand_teams_call (basic_block bb, gomp_teams *entry_stmt) | |
954 { | |
955 tree clauses = gimple_omp_teams_clauses (entry_stmt); | |
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); | |
957 if (num_teams == NULL_TREE) | |
958 num_teams = build_int_cst (unsigned_type_node, 0); | |
959 else | |
960 { | |
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams); | |
962 num_teams = fold_convert (unsigned_type_node, num_teams); | |
963 } | |
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); | |
965 if (thread_limit == NULL_TREE) | |
966 thread_limit = build_int_cst (unsigned_type_node, 0); | |
967 else | |
968 { | |
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); | |
970 thread_limit = fold_convert (unsigned_type_node, thread_limit); | |
971 } | |
972 | |
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); | |
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1; | |
975 if (t == NULL) | |
976 t1 = null_pointer_node; | |
977 else | |
978 t1 = build_fold_addr_expr (t); | |
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); | |
980 tree t2 = build_fold_addr_expr (child_fndecl); | |
981 | |
982 vec<tree, va_gc> *args; | |
983 vec_alloc (args, 5); | |
984 args->quick_push (t2); | |
985 args->quick_push (t1); | |
986 args->quick_push (num_teams); | |
987 args->quick_push (thread_limit); | |
988 /* For future extensibility. */ | |
989 args->quick_push (build_zero_cst (unsigned_type_node)); | |
990 | |
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, | |
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), | |
993 args); | |
864 | 994 |
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | 995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
866 false, GSI_CONTINUE_LINKING); | 996 false, GSI_CONTINUE_LINKING); |
867 } | 997 } |
868 | 998 |
1110 gimple *entry_stmt, *stmt; | 1240 gimple *entry_stmt, *stmt; |
1111 edge e; | 1241 edge e; |
1112 vec<tree, va_gc> *ws_args; | 1242 vec<tree, va_gc> *ws_args; |
1113 | 1243 |
1114 entry_stmt = last_stmt (region->entry); | 1244 entry_stmt = last_stmt (region->entry); |
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK | |
1246 && gimple_omp_task_taskwait_p (entry_stmt)) | |
1247 { | |
1248 new_bb = region->entry; | |
1249 gsi = gsi_last_nondebug_bb (region->entry); | |
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); | |
1251 gsi_remove (&gsi, true); | |
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); | |
1253 return; | |
1254 } | |
1255 | |
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); | 1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); |
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn); | 1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn); |
1117 | 1258 |
1118 entry_bb = region->entry; | 1259 entry_bb = region->entry; |
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) | 1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) |
1135 | 1276 |
1136 entry_succ_e = single_succ_edge (entry_bb); | 1277 entry_succ_e = single_succ_edge (entry_bb); |
1137 | 1278 |
1138 gsi = gsi_last_nondebug_bb (entry_bb); | 1279 gsi = gsi_last_nondebug_bb (entry_bb); |
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL | 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL |
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); | 1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK |
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); | |
1141 gsi_remove (&gsi, true); | 1283 gsi_remove (&gsi, true); |
1142 | 1284 |
1143 new_bb = entry_bb; | 1285 new_bb = entry_bb; |
1144 if (exit_bb) | 1286 if (exit_bb) |
1145 { | 1287 { |
1188 | 1330 |
1189 /* We're ignore the subcode because we're | 1331 /* We're ignore the subcode because we're |
1190 effectively doing a STRIP_NOPS. */ | 1332 effectively doing a STRIP_NOPS. */ |
1191 | 1333 |
1192 if (TREE_CODE (arg) == ADDR_EXPR | 1334 if (TREE_CODE (arg) == ADDR_EXPR |
1193 && TREE_OPERAND (arg, 0) | 1335 && (TREE_OPERAND (arg, 0) |
1194 == gimple_omp_taskreg_data_arg (entry_stmt)) | 1336 == gimple_omp_taskreg_data_arg (entry_stmt))) |
1195 { | 1337 { |
1196 parcopy_stmt = stmt; | 1338 parcopy_stmt = stmt; |
1197 break; | 1339 break; |
1198 } | 1340 } |
1199 } | 1341 } |
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, | 1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, |
1250 so that it can be moved to the child function. */ | 1392 so that it can be moved to the child function. */ |
1251 gsi = gsi_last_nondebug_bb (entry_bb); | 1393 gsi = gsi_last_nondebug_bb (entry_bb); |
1252 stmt = gsi_stmt (gsi); | 1394 stmt = gsi_stmt (gsi); |
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL | 1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL |
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK)); | 1396 || gimple_code (stmt) == GIMPLE_OMP_TASK |
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); | |
1255 e = split_block (entry_bb, stmt); | 1398 e = split_block (entry_bb, stmt); |
1256 gsi_remove (&gsi, true); | 1399 gsi_remove (&gsi, true); |
1257 entry_bb = e->dest; | 1400 entry_bb = e->dest; |
1258 edge e2 = NULL; | 1401 edge e2 = NULL; |
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) | 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) |
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; | 1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; |
1261 else | 1404 else |
1262 { | 1405 { |
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); | 1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); |
1264 gcc_assert (e2->dest == region->exit); | 1407 gcc_assert (e2->dest == region->exit); |
1292 block = NULL_TREE; | 1435 block = NULL_TREE; |
1293 } | 1436 } |
1294 else | 1437 else |
1295 block = gimple_block (entry_stmt); | 1438 block = gimple_block (entry_stmt); |
1296 | 1439 |
1297 /* Make sure to generate early debug for the function before | |
1298 outlining anything. */ | |
1299 if (! gimple_in_ssa_p (cfun)) | |
1300 (*debug_hooks->early_global_decl) (cfun->decl); | |
1301 | |
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); | 1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); |
1303 if (exit_bb) | 1441 if (exit_bb) |
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; | 1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; |
1305 if (e2) | 1443 if (e2) |
1306 { | 1444 { |
1377 dump_function_header (dump_file, child_fn, dump_flags); | 1515 dump_function_header (dump_file, child_fn, dump_flags); |
1378 dump_function_to_file (child_fn, dump_file, dump_flags); | 1516 dump_function_to_file (child_fn, dump_file, dump_flags); |
1379 } | 1517 } |
1380 } | 1518 } |
1381 | 1519 |
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); | |
1521 | |
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) | 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) |
1383 expand_parallel_call (region, new_bb, | 1523 expand_parallel_call (region, new_bb, |
1384 as_a <gomp_parallel *> (entry_stmt), ws_args); | 1524 as_a <gomp_parallel *> (entry_stmt), ws_args); |
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) | |
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); | |
1385 else | 1527 else |
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); | 1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); |
1387 if (gimple_in_ssa_p (cfun)) | 1529 if (gimple_in_ssa_p (cfun)) |
1388 update_ssa (TODO_update_ssa_only_virtuals); | 1530 update_ssa (TODO_update_ssa_only_virtuals); |
1389 } | 1531 } |
1945 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, | 2087 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, |
1946 false, GSI_CONTINUE_LINKING); | 2088 false, GSI_CONTINUE_LINKING); |
1947 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); | 2089 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); |
1948 stmt = gimple_build_cond_empty (t); | 2090 stmt = gimple_build_cond_empty (t); |
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); | 2091 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2092 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), | |
2093 expand_omp_regimplify_p, NULL, NULL) | |
2094 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), | |
2095 expand_omp_regimplify_p, NULL, NULL)) | |
2096 gimple_regimplify_operands (stmt, &gsi); | |
1950 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); | 2097 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); |
1951 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); | 2098 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); |
1952 } | 2099 } |
1953 else | 2100 else |
1954 make_edge (bb, body_bb, EDGE_FALLTHRU); | 2101 make_edge (bb, body_bb, EDGE_FALLTHRU); |
2011 if (TREE_CODE (step) != INTEGER_CST) | 2158 if (TREE_CODE (step) != INTEGER_CST) |
2012 break; | 2159 break; |
2013 forward = tree_int_cst_sgn (step) != -1; | 2160 forward = tree_int_cst_sgn (step) != -1; |
2014 } | 2161 } |
2015 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) | 2162 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) |
2016 warning_at (loc, 0, "%<depend(sink)%> clause waiting for " | 2163 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " |
2017 "lexically later iteration"); | 2164 "waiting for lexically later iteration"); |
2018 break; | 2165 break; |
2019 } | 2166 } |
2020 deps = TREE_CHAIN (deps); | 2167 deps = TREE_CHAIN (deps); |
2021 } | 2168 } |
2022 /* If all offsets corresponding to the collapsed loops are zero, | 2169 /* If all offsets corresponding to the collapsed loops are zero, |
2148 orig_off ? orig_off : off, s); | 2295 orig_off ? orig_off : off, s); |
2149 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, | 2296 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, |
2150 build_int_cst (itype, 0)); | 2297 build_int_cst (itype, 0)); |
2151 if (integer_zerop (t) && !warned_step) | 2298 if (integer_zerop (t) && !warned_step) |
2152 { | 2299 { |
2153 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " | 2300 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " |
2154 "in the iteration space"); | 2301 "refers to iteration never in the iteration " |
2302 "space"); | |
2155 warned_step = true; | 2303 warned_step = true; |
2156 } | 2304 } |
2157 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, | 2305 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, |
2158 cond, t); | 2306 cond, t); |
2159 } | 2307 } |
2367 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); | 2515 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); |
2368 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); | 2516 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); |
2369 | 2517 |
2370 if (e2) | 2518 if (e2) |
2371 { | 2519 { |
2372 struct loop *loop = alloc_loop (); | 2520 class loop *loop = alloc_loop (); |
2373 loop->header = new_header; | 2521 loop->header = new_header; |
2374 loop->latch = e2->src; | 2522 loop->latch = e2->src; |
2375 add_loop (loop, body_bb->loop_father); | 2523 add_loop (loop, body_bb->loop_father); |
2376 } | 2524 } |
2377 } | 2525 } |
2497 static void | 2645 static void |
2498 expand_omp_for_generic (struct omp_region *region, | 2646 expand_omp_for_generic (struct omp_region *region, |
2499 struct omp_for_data *fd, | 2647 struct omp_for_data *fd, |
2500 enum built_in_function start_fn, | 2648 enum built_in_function start_fn, |
2501 enum built_in_function next_fn, | 2649 enum built_in_function next_fn, |
2650 tree sched_arg, | |
2502 gimple *inner_stmt) | 2651 gimple *inner_stmt) |
2503 { | 2652 { |
2504 tree type, istart0, iend0, iend; | 2653 tree type, istart0, iend0, iend; |
2505 tree t, vmain, vback, bias = NULL_TREE; | 2654 tree t, vmain, vback, bias = NULL_TREE; |
2506 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; | 2655 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; |
2541 | 2690 |
2542 gsi = gsi_last_nondebug_bb (entry_bb); | 2691 gsi = gsi_last_nondebug_bb (entry_bb); |
2543 | 2692 |
2544 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); | 2693 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); |
2545 if (fd->ordered | 2694 if (fd->ordered |
2546 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), | 2695 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), |
2547 OMP_CLAUSE_LASTPRIVATE)) | 2696 OMP_CLAUSE_LASTPRIVATE)) |
2548 ordered_lastprivate = false; | 2697 ordered_lastprivate = false; |
2698 tree reductions = NULL_TREE; | |
2699 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE; | |
2700 tree memv = NULL_TREE; | |
2701 if (fd->lastprivate_conditional) | |
2702 { | |
2703 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | |
2704 OMP_CLAUSE__CONDTEMP_); | |
2705 if (fd->have_pointer_condtemp) | |
2706 condtemp = OMP_CLAUSE_DECL (c); | |
2707 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); | |
2708 cond_var = OMP_CLAUSE_DECL (c); | |
2709 } | |
2710 if (sched_arg) | |
2711 { | |
2712 if (fd->have_reductemp) | |
2713 { | |
2714 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | |
2715 OMP_CLAUSE__REDUCTEMP_); | |
2716 reductions = OMP_CLAUSE_DECL (c); | |
2717 gcc_assert (TREE_CODE (reductions) == SSA_NAME); | |
2718 gimple *g = SSA_NAME_DEF_STMT (reductions); | |
2719 reductions = gimple_assign_rhs1 (g); | |
2720 OMP_CLAUSE_DECL (c) = reductions; | |
2721 entry_bb = gimple_bb (g); | |
2722 edge e = split_block (entry_bb, g); | |
2723 if (region->entry == entry_bb) | |
2724 region->entry = e->dest; | |
2725 gsi = gsi_last_bb (entry_bb); | |
2726 } | |
2727 else | |
2728 reductions = null_pointer_node; | |
2729 if (fd->have_pointer_condtemp) | |
2730 { | |
2731 tree type = TREE_TYPE (condtemp); | |
2732 memv = create_tmp_var (type); | |
2733 TREE_ADDRESSABLE (memv) = 1; | |
2734 unsigned HOST_WIDE_INT sz | |
2735 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); | |
2736 sz *= fd->lastprivate_conditional; | |
2737 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), | |
2738 false); | |
2739 mem = build_fold_addr_expr (memv); | |
2740 } | |
2741 else | |
2742 mem = null_pointer_node; | |
2743 } | |
2549 if (fd->collapse > 1 || fd->ordered) | 2744 if (fd->collapse > 1 || fd->ordered) |
2550 { | 2745 { |
2551 int first_zero_iter1 = -1, first_zero_iter2 = -1; | 2746 int first_zero_iter1 = -1, first_zero_iter2 = -1; |
2552 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; | 2747 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; |
2553 | 2748 |
2730 { | 2925 { |
2731 if (fd->chunk_size) | 2926 if (fd->chunk_size) |
2732 { | 2927 { |
2733 t = fold_convert (fd->iter_type, fd->chunk_size); | 2928 t = fold_convert (fd->iter_type, fd->chunk_size); |
2734 t = omp_adjust_chunk_size (t, fd->simd_schedule); | 2929 t = omp_adjust_chunk_size (t, fd->simd_schedule); |
2735 if (fd->ordered) | 2930 if (sched_arg) |
2931 { | |
2932 if (fd->ordered) | |
2933 t = build_call_expr (builtin_decl_explicit (start_fn), | |
2934 8, t0, t1, sched_arg, t, t3, t4, | |
2935 reductions, mem); | |
2936 else | |
2937 t = build_call_expr (builtin_decl_explicit (start_fn), | |
2938 9, t0, t1, t2, sched_arg, t, t3, t4, | |
2939 reductions, mem); | |
2940 } | |
2941 else if (fd->ordered) | |
2736 t = build_call_expr (builtin_decl_explicit (start_fn), | 2942 t = build_call_expr (builtin_decl_explicit (start_fn), |
2737 5, t0, t1, t, t3, t4); | 2943 5, t0, t1, t, t3, t4); |
2738 else | 2944 else |
2739 t = build_call_expr (builtin_decl_explicit (start_fn), | 2945 t = build_call_expr (builtin_decl_explicit (start_fn), |
2740 6, t0, t1, t2, t, t3, t4); | 2946 6, t0, t1, t2, t, t3, t4); |
2763 if (fd->chunk_size) | 2969 if (fd->chunk_size) |
2764 { | 2970 { |
2765 tree bfn_decl = builtin_decl_explicit (start_fn); | 2971 tree bfn_decl = builtin_decl_explicit (start_fn); |
2766 t = fold_convert (fd->iter_type, fd->chunk_size); | 2972 t = fold_convert (fd->iter_type, fd->chunk_size); |
2767 t = omp_adjust_chunk_size (t, fd->simd_schedule); | 2973 t = omp_adjust_chunk_size (t, fd->simd_schedule); |
2768 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); | 2974 if (sched_arg) |
2975 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, | |
2976 t, t3, t4, reductions, mem); | |
2977 else | |
2978 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); | |
2769 } | 2979 } |
2770 else | 2980 else |
2771 t = build_call_expr (builtin_decl_explicit (start_fn), | 2981 t = build_call_expr (builtin_decl_explicit (start_fn), |
2772 6, t5, t0, t1, t2, t3, t4); | 2982 6, t5, t0, t1, t2, t3, t4); |
2773 } | 2983 } |
2777 t, build_int_cst (TREE_TYPE (t), 0)); | 2987 t, build_int_cst (TREE_TYPE (t), 0)); |
2778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | 2988 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
2779 true, GSI_SAME_STMT); | 2989 true, GSI_SAME_STMT); |
2780 if (arr && !TREE_STATIC (arr)) | 2990 if (arr && !TREE_STATIC (arr)) |
2781 { | 2991 { |
2782 tree clobber = build_constructor (TREE_TYPE (arr), NULL); | 2992 tree clobber = build_clobber (TREE_TYPE (arr)); |
2783 TREE_THIS_VOLATILE (clobber) = 1; | |
2784 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), | 2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), |
2785 GSI_SAME_STMT); | 2994 GSI_SAME_STMT); |
2995 } | |
2996 if (fd->have_pointer_condtemp) | |
2997 expand_omp_build_assign (&gsi, condtemp, memv, false); | |
2998 if (fd->have_reductemp) | |
2999 { | |
3000 gimple *g = gsi_stmt (gsi); | |
3001 gsi_remove (&gsi, true); | |
3002 release_ssa_name (gimple_assign_lhs (g)); | |
3003 | |
3004 entry_bb = region->entry; | |
3005 gsi = gsi_last_nondebug_bb (entry_bb); | |
3006 | |
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); | |
2786 } | 3008 } |
2787 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); | 3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); |
2788 | 3010 |
2789 /* Remove the GIMPLE_OMP_FOR statement. */ | 3011 /* Remove the GIMPLE_OMP_FOR statement. */ |
2790 gsi_remove (&gsi, true); | 3012 gsi_remove (&gsi, true); |
2841 DECL_P (startvar) | 3063 DECL_P (startvar) |
2842 && TREE_ADDRESSABLE (startvar), | 3064 && TREE_ADDRESSABLE (startvar), |
2843 NULL_TREE, false, GSI_CONTINUE_LINKING); | 3065 NULL_TREE, false, GSI_CONTINUE_LINKING); |
2844 assign_stmt = gimple_build_assign (startvar, t); | 3066 assign_stmt = gimple_build_assign (startvar, t); |
2845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | 3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); |
3068 if (cond_var) | |
3069 { | |
3070 tree itype = TREE_TYPE (cond_var); | |
3071 /* For lastprivate(conditional:) itervar, we need some iteration | |
3072 counter that starts at unsigned non-zero and increases. | |
3073 Prefer as few IVs as possible, so if we can use startvar | |
3074 itself, use that, or startvar + constant (those would be | |
3075 incremented with step), and as last resort use the s0 + 1 | |
3076 incremented by 1. */ | |
3077 if ((fd->ordered && fd->collapse == 1) | |
3078 || bias | |
3079 || POINTER_TYPE_P (type) | |
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST | |
3081 || fd->loop.cond_code != LT_EXPR) | |
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0), | |
3083 build_int_cst (itype, 1)); | |
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1) | |
3085 t = fold_convert (itype, t); | |
3086 else | |
3087 { | |
3088 tree c = fold_convert (itype, fd->loop.n1); | |
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); | |
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); | |
3091 } | |
3092 t = force_gimple_operand_gsi (&gsi, t, false, | |
3093 NULL_TREE, false, GSI_CONTINUE_LINKING); | |
3094 assign_stmt = gimple_build_assign (cond_var, t); | |
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | |
3096 } | |
2846 | 3097 |
2847 t = iend0; | 3098 t = iend0; |
2848 if (fd->ordered && fd->collapse == 1) | 3099 if (fd->ordered && fd->collapse == 1) |
2849 t = fold_build2 (MULT_EXPR, fd->iter_type, t, | 3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t, |
2850 fold_convert (fd->iter_type, fd->loop.step)); | 3101 fold_convert (fd->iter_type, fd->loop.step)); |
3012 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); | 3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); |
3013 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); | 3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); |
3014 vmain = gimple_omp_continue_control_use (cont_stmt); | 3265 vmain = gimple_omp_continue_control_use (cont_stmt); |
3015 vback = gimple_omp_continue_control_def (cont_stmt); | 3266 vback = gimple_omp_continue_control_def (cont_stmt); |
3016 | 3267 |
3268 if (cond_var) | |
3269 { | |
3270 tree itype = TREE_TYPE (cond_var); | |
3271 tree t2; | |
3272 if ((fd->ordered && fd->collapse == 1) | |
3273 || bias | |
3274 || POINTER_TYPE_P (type) | |
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST | |
3276 || fd->loop.cond_code != LT_EXPR) | |
3277 t2 = build_int_cst (itype, 1); | |
3278 else | |
3279 t2 = fold_convert (itype, fd->loop.step); | |
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); | |
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false, | |
3282 NULL_TREE, true, GSI_SAME_STMT); | |
3283 assign_stmt = gimple_build_assign (cond_var, t2); | |
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); | |
3285 } | |
3286 | |
3017 if (!gimple_omp_for_combined_p (fd->for_stmt)) | 3287 if (!gimple_omp_for_combined_p (fd->for_stmt)) |
3018 { | 3288 { |
3019 if (POINTER_TYPE_P (type)) | 3289 if (POINTER_TYPE_P (type)) |
3020 t = fold_build_pointer_plus (vmain, fd->loop.step); | 3290 t = fold_build_pointer_plus (vmain, fd->loop.step); |
3021 else | 3291 else |
3027 assign_stmt = gimple_build_assign (vback, t); | 3297 assign_stmt = gimple_build_assign (vback, t); |
3028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); | 3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); |
3029 | 3299 |
3030 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) | 3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) |
3031 { | 3301 { |
3302 tree tem; | |
3032 if (fd->collapse > 1) | 3303 if (fd->collapse > 1) |
3033 t = fd->loop.v; | 3304 tem = fd->loop.v; |
3034 else | 3305 else |
3035 { | 3306 { |
3036 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), | 3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), |
3037 fd->loops[0].v, fd->loops[0].n1); | 3308 fd->loops[0].v, fd->loops[0].n1); |
3038 t = fold_convert (fd->iter_type, t); | 3309 tem = fold_convert (fd->iter_type, tem); |
3039 } | 3310 } |
3040 tree aref = build4 (ARRAY_REF, fd->iter_type, | 3311 tree aref = build4 (ARRAY_REF, fd->iter_type, |
3041 counts[fd->ordered], size_zero_node, | 3312 counts[fd->ordered], size_zero_node, |
3042 NULL_TREE, NULL_TREE); | 3313 NULL_TREE, NULL_TREE); |
3043 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, | 3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, |
3044 true, GSI_SAME_STMT); | 3315 true, GSI_SAME_STMT); |
3045 expand_omp_build_assign (&gsi, aref, t); | 3316 expand_omp_build_assign (&gsi, aref, tem); |
3046 } | 3317 } |
3047 | 3318 |
3048 t = build2 (fd->loop.cond_code, boolean_type_node, | 3319 t = build2 (fd->loop.cond_code, boolean_type_node, |
3049 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, | 3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, |
3050 iend); | 3321 iend); |
3080 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) | 3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) |
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); | 3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); |
3082 else | 3353 else |
3083 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); | 3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); |
3084 gcall *call_stmt = gimple_build_call (t, 0); | 3355 gcall *call_stmt = gimple_build_call (t, 0); |
3085 if (gimple_omp_return_lhs (gsi_stmt (gsi))) | |
3086 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); | |
3087 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); | |
3088 if (fd->ordered) | 3356 if (fd->ordered) |
3089 { | 3357 { |
3090 tree arr = counts[fd->ordered]; | 3358 tree arr = counts[fd->ordered]; |
3091 tree clobber = build_constructor (TREE_TYPE (arr), NULL); | 3359 tree clobber = build_clobber (TREE_TYPE (arr)); |
3092 TREE_THIS_VOLATILE (clobber) = 1; | |
3093 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), | 3360 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), |
3094 GSI_SAME_STMT); | 3361 GSI_SAME_STMT); |
3095 } | 3362 } |
3363 if (gimple_omp_return_lhs (gsi_stmt (gsi))) | |
3364 { | |
3365 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); | |
3366 if (fd->have_reductemp) | |
3367 { | |
3368 gimple *g = gimple_build_assign (reductions, NOP_EXPR, | |
3369 gimple_call_lhs (call_stmt)); | |
3370 gsi_insert_after (&gsi, g, GSI_SAME_STMT); | |
3371 } | |
3372 } | |
3373 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); | |
3096 gsi_remove (&gsi, true); | 3374 gsi_remove (&gsi, true); |
3097 | 3375 |
3098 /* Connect the new blocks. */ | 3376 /* Connect the new blocks. */ |
3099 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; | 3377 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; |
3100 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; | 3378 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; |
3152 original loop, and move the loop entry value of the inner phi to | 3430 original loop, and move the loop entry value of the inner phi to |
3153 the loop entry value of the outer phi. */ | 3431 the loop entry value of the outer phi. */ |
3154 gphi_iterator psi; | 3432 gphi_iterator psi; |
3155 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) | 3433 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) |
3156 { | 3434 { |
3157 source_location locus; | 3435 location_t locus; |
3158 gphi *nphi; | 3436 gphi *nphi; |
3159 gphi *exit_phi = psi.phi (); | 3437 gphi *exit_phi = psi.phi (); |
3160 | 3438 |
3161 if (virtual_operand_p (gimple_phi_result (exit_phi))) | 3439 if (virtual_operand_p (gimple_phi_result (exit_phi))) |
3162 continue; | 3440 continue; |
3196 recompute_dominator (CDI_DOMINATORS, l1_bb)); | 3474 recompute_dominator (CDI_DOMINATORS, l1_bb)); |
3197 | 3475 |
3198 /* We enter expand_omp_for_generic with a loop. This original loop may | 3476 /* We enter expand_omp_for_generic with a loop. This original loop may |
3199 have its own loop struct, or it may be part of an outer loop struct | 3477 have its own loop struct, or it may be part of an outer loop struct |
3200 (which may be the fake loop). */ | 3478 (which may be the fake loop). */ |
3201 struct loop *outer_loop = entry_bb->loop_father; | 3479 class loop *outer_loop = entry_bb->loop_father; |
3202 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; | 3480 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; |
3203 | 3481 |
3204 add_bb_to_loop (l2_bb, outer_loop); | 3482 add_bb_to_loop (l2_bb, outer_loop); |
3205 | 3483 |
3206 /* We've added a new loop around the original loop. Allocate the | 3484 /* We've added a new loop around the original loop. Allocate the |
3207 corresponding loop struct. */ | 3485 corresponding loop struct. */ |
3208 struct loop *new_loop = alloc_loop (); | 3486 class loop *new_loop = alloc_loop (); |
3209 new_loop->header = l0_bb; | 3487 new_loop->header = l0_bb; |
3210 new_loop->latch = l2_bb; | 3488 new_loop->latch = l2_bb; |
3211 add_loop (new_loop, outer_loop); | 3489 add_loop (new_loop, outer_loop); |
3212 | 3490 |
3213 /* Allocate a loop structure for the original loop unless we already | 3491 /* Allocate a loop structure for the original loop unless we already |
3214 had one. */ | 3492 had one. */ |
3215 if (!orig_loop_has_loop_struct | 3493 if (!orig_loop_has_loop_struct |
3216 && !gimple_omp_for_combined_p (fd->for_stmt)) | 3494 && !gimple_omp_for_combined_p (fd->for_stmt)) |
3217 { | 3495 { |
3218 struct loop *orig_loop = alloc_loop (); | 3496 class loop *orig_loop = alloc_loop (); |
3219 orig_loop->header = l1_bb; | 3497 orig_loop->header = l1_bb; |
3220 /* The loop may have multiple latches. */ | 3498 /* The loop may have multiple latches. */ |
3221 add_loop (orig_loop, new_loop); | 3499 add_loop (orig_loop, new_loop); |
3222 } | 3500 } |
3223 } | 3501 } |
3502 } | |
3503 | |
3504 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, | |
3505 compute needed allocation size. If !ALLOC of team allocations, | |
3506 if ALLOC of thread allocation. SZ is the initial needed size for | |
3507 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, | |
3508 CNT number of elements of each array, for !ALLOC this is | |
3509 omp_get_num_threads (), for ALLOC number of iterations handled by the | |
3510 current thread. If PTR is non-NULL, it is the start of the allocation | |
3511 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ | |
3512 clauses pointers to the corresponding arrays. */ | |
3513 | |
3514 static tree | |
3515 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, | |
3516 unsigned HOST_WIDE_INT alloc_align, tree cnt, | |
3517 gimple_stmt_iterator *gsi, bool alloc) | |
3518 { | |
3519 tree eltsz = NULL_TREE; | |
3520 unsigned HOST_WIDE_INT preval = 0; | |
3521 if (ptr && sz) | |
3522 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), | |
3523 ptr, size_int (sz)); | |
3524 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
3525 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ | |
3526 && !OMP_CLAUSE__SCANTEMP__CONTROL (c) | |
3527 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) | |
3528 { | |
3529 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); | |
3530 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); | |
3531 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) | |
3532 { | |
3533 unsigned HOST_WIDE_INT szl | |
3534 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); | |
3535 szl = least_bit_hwi (szl); | |
3536 if (szl) | |
3537 al = MIN (al, szl); | |
3538 } | |
3539 if (ptr == NULL_TREE) | |
3540 { | |
3541 if (eltsz == NULL_TREE) | |
3542 eltsz = TYPE_SIZE_UNIT (pointee_type); | |
3543 else | |
3544 eltsz = size_binop (PLUS_EXPR, eltsz, | |
3545 TYPE_SIZE_UNIT (pointee_type)); | |
3546 } | |
3547 if (preval == 0 && al <= alloc_align) | |
3548 { | |
3549 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; | |
3550 sz += diff; | |
3551 if (diff && ptr) | |
3552 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), | |
3553 ptr, size_int (diff)); | |
3554 } | |
3555 else if (al > preval) | |
3556 { | |
3557 if (ptr) | |
3558 { | |
3559 ptr = fold_convert (pointer_sized_int_node, ptr); | |
3560 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, | |
3561 build_int_cst (pointer_sized_int_node, | |
3562 al - 1)); | |
3563 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, | |
3564 build_int_cst (pointer_sized_int_node, | |
3565 -(HOST_WIDE_INT) al)); | |
3566 ptr = fold_convert (ptr_type_node, ptr); | |
3567 } | |
3568 else | |
3569 sz += al - 1; | |
3570 } | |
3571 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) | |
3572 preval = al; | |
3573 else | |
3574 preval = 1; | |
3575 if (ptr) | |
3576 { | |
3577 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); | |
3578 ptr = OMP_CLAUSE_DECL (c); | |
3579 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, | |
3580 size_binop (MULT_EXPR, cnt, | |
3581 TYPE_SIZE_UNIT (pointee_type))); | |
3582 } | |
3583 } | |
3584 | |
3585 if (ptr == NULL_TREE) | |
3586 { | |
3587 eltsz = size_binop (MULT_EXPR, eltsz, cnt); | |
3588 if (sz) | |
3589 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); | |
3590 return eltsz; | |
3591 } | |
3592 else | |
3593 return ptr; | |
3224 } | 3594 } |
3225 | 3595 |
3226 /* A subroutine of expand_omp_for. Generate code for a parallel | 3596 /* A subroutine of expand_omp_for. Generate code for a parallel |
3227 loop with static schedule and no specified chunk size. Given | 3597 loop with static schedule and no specified chunk size. Given |
3228 parameters: | 3598 parameters: |
3263 static void | 3633 static void |
3264 expand_omp_for_static_nochunk (struct omp_region *region, | 3634 expand_omp_for_static_nochunk (struct omp_region *region, |
3265 struct omp_for_data *fd, | 3635 struct omp_for_data *fd, |
3266 gimple *inner_stmt) | 3636 gimple *inner_stmt) |
3267 { | 3637 { |
3268 tree n, q, s0, e0, e, t, tt, nthreads, threadid; | 3638 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; |
3269 tree type, itype, vmain, vback; | 3639 tree type, itype, vmain, vback; |
3270 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; | 3640 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; |
3271 basic_block body_bb, cont_bb, collapse_bb = NULL; | 3641 basic_block body_bb, cont_bb, collapse_bb = NULL; |
3272 basic_block fin_bb; | 3642 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; |
3273 gimple_stmt_iterator gsi; | 3643 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; |
3644 gimple_stmt_iterator gsi, gsip; | |
3274 edge ep; | 3645 edge ep; |
3275 bool broken_loop = region->cont == NULL; | 3646 bool broken_loop = region->cont == NULL; |
3276 tree *counts = NULL; | 3647 tree *counts = NULL; |
3277 tree n1, n2, step; | 3648 tree n1, n2, step; |
3649 tree reductions = NULL_TREE; | |
3650 tree cond_var = NULL_TREE, condtemp = NULL_TREE; | |
3278 | 3651 |
3279 itype = type = TREE_TYPE (fd->loop.v); | 3652 itype = type = TREE_TYPE (fd->loop.v); |
3280 if (POINTER_TYPE_P (type)) | 3653 if (POINTER_TYPE_P (type)) |
3281 itype = signed_type_for (type); | 3654 itype = signed_type_for (type); |
3282 | 3655 |
3297 exit_bb = region->exit; | 3670 exit_bb = region->exit; |
3298 | 3671 |
3299 /* Iteration space partitioning goes in ENTRY_BB. */ | 3672 /* Iteration space partitioning goes in ENTRY_BB. */ |
3300 gsi = gsi_last_nondebug_bb (entry_bb); | 3673 gsi = gsi_last_nondebug_bb (entry_bb); |
3301 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); | 3674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); |
3675 gsip = gsi; | |
3676 gsi_prev (&gsip); | |
3302 | 3677 |
3303 if (fd->collapse > 1) | 3678 if (fd->collapse > 1) |
3304 { | 3679 { |
3305 int first_zero_iter = -1, dummy = -1; | 3680 int first_zero_iter = -1, dummy = -1; |
3306 basic_block l2_dom_bb = NULL, dummy_bb = NULL; | 3681 basic_block l2_dom_bb = NULL, dummy_bb = NULL; |
3326 true, GSI_SAME_STMT); | 3701 true, GSI_SAME_STMT); |
3327 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); | 3702 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); |
3328 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, | 3703 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, |
3329 true, GSI_SAME_STMT); | 3704 true, GSI_SAME_STMT); |
3330 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, | 3705 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, |
3331 NULL_TREE, NULL_TREE); | 3706 NULL_TREE, NULL_TREE); |
3332 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); | 3707 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); |
3333 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), | 3708 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), |
3334 expand_omp_regimplify_p, NULL, NULL) | 3709 expand_omp_regimplify_p, NULL, NULL) |
3335 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), | 3710 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), |
3336 expand_omp_regimplify_p, NULL, NULL)) | 3711 expand_omp_regimplify_p, NULL, NULL)) |
3356 } | 3731 } |
3357 } | 3732 } |
3358 gsi = gsi_last_bb (entry_bb); | 3733 gsi = gsi_last_bb (entry_bb); |
3359 } | 3734 } |
3360 | 3735 |
3736 if (fd->lastprivate_conditional) | |
3737 { | |
3738 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
3739 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); | |
3740 if (fd->have_pointer_condtemp) | |
3741 condtemp = OMP_CLAUSE_DECL (c); | |
3742 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); | |
3743 cond_var = OMP_CLAUSE_DECL (c); | |
3744 } | |
3745 if (fd->have_reductemp | |
3746 /* For scan, we don't want to reinitialize condtemp before the | |
3747 second loop. */ | |
3748 || (fd->have_pointer_condtemp && !fd->have_scantemp) | |
3749 || fd->have_nonctrl_scantemp) | |
3750 { | |
3751 tree t1 = build_int_cst (long_integer_type_node, 0); | |
3752 tree t2 = build_int_cst (long_integer_type_node, 1); | |
3753 tree t3 = build_int_cstu (long_integer_type_node, | |
3754 (HOST_WIDE_INT_1U << 31) + 1); | |
3755 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
3756 gimple_stmt_iterator gsi2 = gsi_none (); | |
3757 gimple *g = NULL; | |
3758 tree mem = null_pointer_node, memv = NULL_TREE; | |
3759 unsigned HOST_WIDE_INT condtemp_sz = 0; | |
3760 unsigned HOST_WIDE_INT alloc_align = 0; | |
3761 if (fd->have_reductemp) | |
3762 { | |
3763 gcc_assert (!fd->have_nonctrl_scantemp); | |
3764 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); | |
3765 reductions = OMP_CLAUSE_DECL (c); | |
3766 gcc_assert (TREE_CODE (reductions) == SSA_NAME); | |
3767 g = SSA_NAME_DEF_STMT (reductions); | |
3768 reductions = gimple_assign_rhs1 (g); | |
3769 OMP_CLAUSE_DECL (c) = reductions; | |
3770 gsi2 = gsi_for_stmt (g); | |
3771 } | |
3772 else | |
3773 { | |
3774 if (gsi_end_p (gsip)) | |
3775 gsi2 = gsi_after_labels (region->entry); | |
3776 else | |
3777 gsi2 = gsip; | |
3778 reductions = null_pointer_node; | |
3779 } | |
3780 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) | |
3781 { | |
3782 tree type; | |
3783 if (fd->have_pointer_condtemp) | |
3784 type = TREE_TYPE (condtemp); | |
3785 else | |
3786 type = ptr_type_node; | |
3787 memv = create_tmp_var (type); | |
3788 TREE_ADDRESSABLE (memv) = 1; | |
3789 unsigned HOST_WIDE_INT sz = 0; | |
3790 tree size = NULL_TREE; | |
3791 if (fd->have_pointer_condtemp) | |
3792 { | |
3793 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); | |
3794 sz *= fd->lastprivate_conditional; | |
3795 condtemp_sz = sz; | |
3796 } | |
3797 if (fd->have_nonctrl_scantemp) | |
3798 { | |
3799 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); | |
3800 gimple *g = gimple_build_call (nthreads, 0); | |
3801 nthreads = create_tmp_var (integer_type_node); | |
3802 gimple_call_set_lhs (g, nthreads); | |
3803 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); | |
3804 nthreads = fold_convert (sizetype, nthreads); | |
3805 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); | |
3806 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, | |
3807 alloc_align, nthreads, NULL, | |
3808 false); | |
3809 size = fold_convert (type, size); | |
3810 } | |
3811 else | |
3812 size = build_int_cst (type, sz); | |
3813 expand_omp_build_assign (&gsi2, memv, size, false); | |
3814 mem = build_fold_addr_expr (memv); | |
3815 } | |
3816 tree t | |
3817 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), | |
3818 9, t1, t2, t2, t3, t1, null_pointer_node, | |
3819 null_pointer_node, reductions, mem); | |
3820 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, | |
3821 true, GSI_SAME_STMT); | |
3822 if (fd->have_pointer_condtemp) | |
3823 expand_omp_build_assign (&gsi2, condtemp, memv, false); | |
3824 if (fd->have_nonctrl_scantemp) | |
3825 { | |
3826 tree ptr = fd->have_pointer_condtemp ? condtemp : memv; | |
3827 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, | |
3828 alloc_align, nthreads, &gsi2, false); | |
3829 } | |
3830 if (fd->have_reductemp) | |
3831 { | |
3832 gsi_remove (&gsi2, true); | |
3833 release_ssa_name (gimple_assign_lhs (g)); | |
3834 } | |
3835 } | |
3361 switch (gimple_omp_for_kind (fd->for_stmt)) | 3836 switch (gimple_omp_for_kind (fd->for_stmt)) |
3362 { | 3837 { |
3363 case GF_OMP_FOR_KIND_FOR: | 3838 case GF_OMP_FOR_KIND_FOR: |
3364 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); | 3839 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); |
3365 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); | 3840 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); |
3439 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); | 3914 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); |
3440 | 3915 |
3441 third_bb = split_block (second_bb, assign_stmt)->dest; | 3916 third_bb = split_block (second_bb, assign_stmt)->dest; |
3442 gsi = gsi_last_nondebug_bb (third_bb); | 3917 gsi = gsi_last_nondebug_bb (third_bb); |
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); | 3918 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); |
3919 | |
3920 if (fd->have_nonctrl_scantemp) | |
3921 { | |
3922 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
3923 tree controlp = NULL_TREE, controlb = NULL_TREE; | |
3924 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
3925 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ | |
3926 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) | |
3927 { | |
3928 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) | |
3929 controlb = OMP_CLAUSE_DECL (c); | |
3930 else | |
3931 controlp = OMP_CLAUSE_DECL (c); | |
3932 if (controlb && controlp) | |
3933 break; | |
3934 } | |
3935 gcc_assert (controlp && controlb); | |
3936 tree cnt = create_tmp_var (sizetype); | |
3937 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); | |
3938 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3939 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); | |
3940 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, | |
3941 alloc_align, cnt, NULL, true); | |
3942 tree size = create_tmp_var (sizetype); | |
3943 expand_omp_build_assign (&gsi, size, sz, false); | |
3944 tree cmp = fold_build2 (GT_EXPR, boolean_type_node, | |
3945 size, size_int (16384)); | |
3946 expand_omp_build_assign (&gsi, controlb, cmp); | |
3947 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, | |
3948 NULL_TREE, NULL_TREE); | |
3949 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3950 fourth_bb = split_block (third_bb, g)->dest; | |
3951 gsi = gsi_last_nondebug_bb (fourth_bb); | |
3952 /* FIXME: Once we have allocators, this should use allocator. */ | |
3953 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); | |
3954 gimple_call_set_lhs (g, controlp); | |
3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3956 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, | |
3957 &gsi, true); | |
3958 gsi_prev (&gsi); | |
3959 g = gsi_stmt (gsi); | |
3960 fifth_bb = split_block (fourth_bb, g)->dest; | |
3961 gsi = gsi_last_nondebug_bb (fifth_bb); | |
3962 | |
3963 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); | |
3964 gimple_call_set_lhs (g, controlp); | |
3965 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3966 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); | |
3967 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
3968 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ | |
3969 && OMP_CLAUSE__SCANTEMP__ALLOC (c)) | |
3970 { | |
3971 tree tmp = create_tmp_var (sizetype); | |
3972 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); | |
3973 g = gimple_build_assign (tmp, MULT_EXPR, cnt, | |
3974 TYPE_SIZE_UNIT (pointee_type)); | |
3975 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3976 g = gimple_build_call (alloca_decl, 2, tmp, | |
3977 size_int (TYPE_ALIGN (pointee_type))); | |
3978 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); | |
3979 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
3980 } | |
3981 | |
3982 sixth_bb = split_block (fifth_bb, g)->dest; | |
3983 gsi = gsi_last_nondebug_bb (sixth_bb); | |
3984 } | |
3444 | 3985 |
3445 t = build2 (MULT_EXPR, itype, q, threadid); | 3986 t = build2 (MULT_EXPR, itype, q, threadid); |
3446 t = build2 (PLUS_EXPR, itype, t, tt); | 3987 t = build2 (PLUS_EXPR, itype, t, tt); |
3447 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); | 3988 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); |
3448 | 3989 |
3514 DECL_P (startvar) | 4055 DECL_P (startvar) |
3515 && TREE_ADDRESSABLE (startvar), | 4056 && TREE_ADDRESSABLE (startvar), |
3516 NULL_TREE, false, GSI_CONTINUE_LINKING); | 4057 NULL_TREE, false, GSI_CONTINUE_LINKING); |
3517 assign_stmt = gimple_build_assign (startvar, t); | 4058 assign_stmt = gimple_build_assign (startvar, t); |
3518 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | 4059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); |
4060 if (cond_var) | |
4061 { | |
4062 tree itype = TREE_TYPE (cond_var); | |
4063 /* For lastprivate(conditional:) itervar, we need some iteration | |
4064 counter that starts at unsigned non-zero and increases. | |
4065 Prefer as few IVs as possible, so if we can use startvar | |
4066 itself, use that, or startvar + constant (those would be | |
4067 incremented with step), and as last resort use the s0 + 1 | |
4068 incremented by 1. */ | |
4069 if (POINTER_TYPE_P (type) | |
4070 || TREE_CODE (n1) != INTEGER_CST | |
4071 || fd->loop.cond_code != LT_EXPR) | |
4072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), | |
4073 build_int_cst (itype, 1)); | |
4074 else if (tree_int_cst_sgn (n1) == 1) | |
4075 t = fold_convert (itype, t); | |
4076 else | |
4077 { | |
4078 tree c = fold_convert (itype, n1); | |
4079 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); | |
4080 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); | |
4081 } | |
4082 t = force_gimple_operand_gsi (&gsi, t, false, | |
4083 NULL_TREE, false, GSI_CONTINUE_LINKING); | |
4084 assign_stmt = gimple_build_assign (cond_var, t); | |
4085 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | |
4086 } | |
3519 | 4087 |
3520 t = fold_convert (itype, e0); | 4088 t = fold_convert (itype, e0); |
3521 t = fold_build2 (MULT_EXPR, itype, t, step); | 4089 t = fold_build2 (MULT_EXPR, itype, t, step); |
3522 if (POINTER_TYPE_P (type)) | 4090 if (POINTER_TYPE_P (type)) |
3523 { | 4091 { |
3595 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); | 4163 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); |
3596 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); | 4164 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); |
3597 vmain = gimple_omp_continue_control_use (cont_stmt); | 4165 vmain = gimple_omp_continue_control_use (cont_stmt); |
3598 vback = gimple_omp_continue_control_def (cont_stmt); | 4166 vback = gimple_omp_continue_control_def (cont_stmt); |
3599 | 4167 |
4168 if (cond_var) | |
4169 { | |
4170 tree itype = TREE_TYPE (cond_var); | |
4171 tree t2; | |
4172 if (POINTER_TYPE_P (type) | |
4173 || TREE_CODE (n1) != INTEGER_CST | |
4174 || fd->loop.cond_code != LT_EXPR) | |
4175 t2 = build_int_cst (itype, 1); | |
4176 else | |
4177 t2 = fold_convert (itype, step); | |
4178 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); | |
4179 t2 = force_gimple_operand_gsi (&gsi, t2, false, | |
4180 NULL_TREE, true, GSI_SAME_STMT); | |
4181 assign_stmt = gimple_build_assign (cond_var, t2); | |
4182 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); | |
4183 } | |
4184 | |
3600 if (!gimple_omp_for_combined_p (fd->for_stmt)) | 4185 if (!gimple_omp_for_combined_p (fd->for_stmt)) |
3601 { | 4186 { |
3602 if (POINTER_TYPE_P (type)) | 4187 if (POINTER_TYPE_P (type)) |
3603 t = fold_build_pointer_plus (vmain, step); | 4188 t = fold_build_pointer_plus (vmain, step); |
3604 else | 4189 else |
3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ | 4211 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ |
3627 gsi = gsi_last_nondebug_bb (exit_bb); | 4212 gsi = gsi_last_nondebug_bb (exit_bb); |
3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) | 4213 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) |
3629 { | 4214 { |
3630 t = gimple_omp_return_lhs (gsi_stmt (gsi)); | 4215 t = gimple_omp_return_lhs (gsi_stmt (gsi)); |
3631 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); | 4216 if (fd->have_reductemp |
4217 || ((fd->have_pointer_condtemp || fd->have_scantemp) | |
4218 && !fd->have_nonctrl_scantemp)) | |
4219 { | |
4220 tree fn; | |
4221 if (t) | |
4222 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); | |
4223 else | |
4224 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); | |
4225 gcall *g = gimple_build_call (fn, 0); | |
4226 if (t) | |
4227 { | |
4228 gimple_call_set_lhs (g, t); | |
4229 if (fd->have_reductemp) | |
4230 gsi_insert_after (&gsi, gimple_build_assign (reductions, | |
4231 NOP_EXPR, t), | |
4232 GSI_SAME_STMT); | |
4233 } | |
4234 gsi_insert_after (&gsi, g, GSI_SAME_STMT); | |
4235 } | |
4236 else | |
4237 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); | |
4238 } | |
4239 else if ((fd->have_pointer_condtemp || fd->have_scantemp) | |
4240 && !fd->have_nonctrl_scantemp) | |
4241 { | |
4242 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); | |
4243 gcall *g = gimple_build_call (fn, 0); | |
4244 gsi_insert_after (&gsi, g, GSI_SAME_STMT); | |
4245 } | |
4246 if (fd->have_scantemp && !fd->have_nonctrl_scantemp) | |
4247 { | |
4248 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
4249 tree controlp = NULL_TREE, controlb = NULL_TREE; | |
4250 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
4251 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ | |
4252 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) | |
4253 { | |
4254 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) | |
4255 controlb = OMP_CLAUSE_DECL (c); | |
4256 else | |
4257 controlp = OMP_CLAUSE_DECL (c); | |
4258 if (controlb && controlp) | |
4259 break; | |
4260 } | |
4261 gcc_assert (controlp && controlb); | |
4262 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, | |
4263 NULL_TREE, NULL_TREE); | |
4264 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
4265 exit1_bb = split_block (exit_bb, g)->dest; | |
4266 gsi = gsi_after_labels (exit1_bb); | |
4267 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, | |
4268 controlp); | |
4269 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
4270 exit2_bb = split_block (exit1_bb, g)->dest; | |
4271 gsi = gsi_after_labels (exit2_bb); | |
4272 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, | |
4273 controlp); | |
4274 gsi_insert_before (&gsi, g, GSI_SAME_STMT); | |
4275 exit3_bb = split_block (exit2_bb, g)->dest; | |
4276 gsi = gsi_after_labels (exit3_bb); | |
3632 } | 4277 } |
3633 gsi_remove (&gsi, true); | 4278 gsi_remove (&gsi, true); |
3634 | 4279 |
3635 /* Connect all the blocks. */ | 4280 /* Connect all the blocks. */ |
3636 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); | 4281 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); |
3637 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); | 4282 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); |
3638 ep = find_edge (entry_bb, second_bb); | 4283 ep = find_edge (entry_bb, second_bb); |
3639 ep->flags = EDGE_TRUE_VALUE; | 4284 ep->flags = EDGE_TRUE_VALUE; |
3640 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); | 4285 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); |
3641 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; | 4286 if (fourth_bb) |
3642 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; | 4287 { |
4288 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); | |
4289 ep->probability | |
4290 = profile_probability::guessed_always ().apply_scale (1, 2); | |
4291 ep = find_edge (third_bb, fourth_bb); | |
4292 ep->flags = EDGE_TRUE_VALUE; | |
4293 ep->probability | |
4294 = profile_probability::guessed_always ().apply_scale (1, 2); | |
4295 ep = find_edge (fourth_bb, fifth_bb); | |
4296 redirect_edge_and_branch (ep, sixth_bb); | |
4297 } | |
4298 else | |
4299 sixth_bb = third_bb; | |
4300 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; | |
4301 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; | |
4302 if (exit1_bb) | |
4303 { | |
4304 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); | |
4305 ep->probability | |
4306 = profile_probability::guessed_always ().apply_scale (1, 2); | |
4307 ep = find_edge (exit_bb, exit1_bb); | |
4308 ep->flags = EDGE_TRUE_VALUE; | |
4309 ep->probability | |
4310 = profile_probability::guessed_always ().apply_scale (1, 2); | |
4311 ep = find_edge (exit1_bb, exit2_bb); | |
4312 redirect_edge_and_branch (ep, exit3_bb); | |
4313 } | |
3643 | 4314 |
3644 if (!broken_loop) | 4315 if (!broken_loop) |
3645 { | 4316 { |
3646 ep = find_edge (cont_bb, body_bb); | 4317 ep = find_edge (cont_bb, body_bb); |
3647 if (ep == NULL) | 4318 if (ep == NULL) |
3665 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; | 4336 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; |
3666 } | 4337 } |
3667 | 4338 |
3668 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); | 4339 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); |
3669 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); | 4340 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); |
3670 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); | 4341 if (fourth_bb) |
4342 { | |
4343 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); | |
4344 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); | |
4345 } | |
4346 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); | |
3671 | 4347 |
3672 set_immediate_dominator (CDI_DOMINATORS, body_bb, | 4348 set_immediate_dominator (CDI_DOMINATORS, body_bb, |
3673 recompute_dominator (CDI_DOMINATORS, body_bb)); | 4349 recompute_dominator (CDI_DOMINATORS, body_bb)); |
3674 set_immediate_dominator (CDI_DOMINATORS, fin_bb, | 4350 set_immediate_dominator (CDI_DOMINATORS, fin_bb, |
3675 recompute_dominator (CDI_DOMINATORS, fin_bb)); | 4351 recompute_dominator (CDI_DOMINATORS, fin_bb)); |
3676 | 4352 if (exit1_bb) |
3677 struct loop *loop = body_bb->loop_father; | 4353 { |
4354 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); | |
4355 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); | |
4356 } | |
4357 | |
4358 class loop *loop = body_bb->loop_father; | |
3678 if (loop != entry_bb->loop_father) | 4359 if (loop != entry_bb->loop_father) |
3679 { | 4360 { |
3680 gcc_assert (broken_loop || loop->header == body_bb); | 4361 gcc_assert (broken_loop || loop->header == body_bb); |
3681 gcc_assert (broken_loop | 4362 gcc_assert (broken_loop |
3682 || loop->latch == region->cont | 4363 || loop->latch == region->cont |
3758 tree n, s0, e0, e, t; | 4439 tree n, s0, e0, e, t; |
3759 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; | 4440 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; |
3760 tree type, itype, vmain, vback, vextra; | 4441 tree type, itype, vmain, vback, vextra; |
3761 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; | 4442 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; |
3762 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; | 4443 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; |
3763 gimple_stmt_iterator gsi; | 4444 gimple_stmt_iterator gsi, gsip; |
3764 edge se; | 4445 edge se; |
3765 bool broken_loop = region->cont == NULL; | 4446 bool broken_loop = region->cont == NULL; |
3766 tree *counts = NULL; | 4447 tree *counts = NULL; |
3767 tree n1, n2, step; | 4448 tree n1, n2, step; |
4449 tree reductions = NULL_TREE; | |
4450 tree cond_var = NULL_TREE, condtemp = NULL_TREE; | |
3768 | 4451 |
3769 itype = type = TREE_TYPE (fd->loop.v); | 4452 itype = type = TREE_TYPE (fd->loop.v); |
3770 if (POINTER_TYPE_P (type)) | 4453 if (POINTER_TYPE_P (type)) |
3771 itype = signed_type_for (type); | 4454 itype = signed_type_for (type); |
3772 | 4455 |
3791 exit_bb = region->exit; | 4474 exit_bb = region->exit; |
3792 | 4475 |
3793 /* Trip and adjustment setup goes in ENTRY_BB. */ | 4476 /* Trip and adjustment setup goes in ENTRY_BB. */ |
3794 gsi = gsi_last_nondebug_bb (entry_bb); | 4477 gsi = gsi_last_nondebug_bb (entry_bb); |
3795 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); | 4478 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); |
4479 gsip = gsi; | |
4480 gsi_prev (&gsip); | |
3796 | 4481 |
3797 if (fd->collapse > 1) | 4482 if (fd->collapse > 1) |
3798 { | 4483 { |
3799 int first_zero_iter = -1, dummy = -1; | 4484 int first_zero_iter = -1, dummy = -1; |
3800 basic_block l2_dom_bb = NULL, dummy_bb = NULL; | 4485 basic_block l2_dom_bb = NULL, dummy_bb = NULL; |
3850 } | 4535 } |
3851 } | 4536 } |
3852 gsi = gsi_last_bb (entry_bb); | 4537 gsi = gsi_last_bb (entry_bb); |
3853 } | 4538 } |
3854 | 4539 |
4540 if (fd->lastprivate_conditional) | |
4541 { | |
4542 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
4543 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); | |
4544 if (fd->have_pointer_condtemp) | |
4545 condtemp = OMP_CLAUSE_DECL (c); | |
4546 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); | |
4547 cond_var = OMP_CLAUSE_DECL (c); | |
4548 } | |
4549 if (fd->have_reductemp || fd->have_pointer_condtemp) | |
4550 { | |
4551 tree t1 = build_int_cst (long_integer_type_node, 0); | |
4552 tree t2 = build_int_cst (long_integer_type_node, 1); | |
4553 tree t3 = build_int_cstu (long_integer_type_node, | |
4554 (HOST_WIDE_INT_1U << 31) + 1); | |
4555 tree clauses = gimple_omp_for_clauses (fd->for_stmt); | |
4556 gimple_stmt_iterator gsi2 = gsi_none (); | |
4557 gimple *g = NULL; | |
4558 tree mem = null_pointer_node, memv = NULL_TREE; | |
4559 if (fd->have_reductemp) | |
4560 { | |
4561 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); | |
4562 reductions = OMP_CLAUSE_DECL (c); | |
4563 gcc_assert (TREE_CODE (reductions) == SSA_NAME); | |
4564 g = SSA_NAME_DEF_STMT (reductions); | |
4565 reductions = gimple_assign_rhs1 (g); | |
4566 OMP_CLAUSE_DECL (c) = reductions; | |
4567 gsi2 = gsi_for_stmt (g); | |
4568 } | |
4569 else | |
4570 { | |
4571 if (gsi_end_p (gsip)) | |
4572 gsi2 = gsi_after_labels (region->entry); | |
4573 else | |
4574 gsi2 = gsip; | |
4575 reductions = null_pointer_node; | |
4576 } | |
4577 if (fd->have_pointer_condtemp) | |
4578 { | |
4579 tree type = TREE_TYPE (condtemp); | |
4580 memv = create_tmp_var (type); | |
4581 TREE_ADDRESSABLE (memv) = 1; | |
4582 unsigned HOST_WIDE_INT sz | |
4583 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); | |
4584 sz *= fd->lastprivate_conditional; | |
4585 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), | |
4586 false); | |
4587 mem = build_fold_addr_expr (memv); | |
4588 } | |
4589 tree t | |
4590 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), | |
4591 9, t1, t2, t2, t3, t1, null_pointer_node, | |
4592 null_pointer_node, reductions, mem); | |
4593 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, | |
4594 true, GSI_SAME_STMT); | |
4595 if (fd->have_pointer_condtemp) | |
4596 expand_omp_build_assign (&gsi2, condtemp, memv, false); | |
4597 if (fd->have_reductemp) | |
4598 { | |
4599 gsi_remove (&gsi2, true); | |
4600 release_ssa_name (gimple_assign_lhs (g)); | |
4601 } | |
4602 } | |
3855 switch (gimple_omp_for_kind (fd->for_stmt)) | 4603 switch (gimple_omp_for_kind (fd->for_stmt)) |
3856 { | 4604 { |
3857 case GF_OMP_FOR_KIND_FOR: | 4605 case GF_OMP_FOR_KIND_FOR: |
3858 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); | 4606 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); |
3859 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); | 4607 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); |
4023 DECL_P (startvar) | 4771 DECL_P (startvar) |
4024 && TREE_ADDRESSABLE (startvar), | 4772 && TREE_ADDRESSABLE (startvar), |
4025 NULL_TREE, false, GSI_CONTINUE_LINKING); | 4773 NULL_TREE, false, GSI_CONTINUE_LINKING); |
4026 assign_stmt = gimple_build_assign (startvar, t); | 4774 assign_stmt = gimple_build_assign (startvar, t); |
4027 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | 4775 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); |
4776 if (cond_var) | |
4777 { | |
4778 tree itype = TREE_TYPE (cond_var); | |
4779 /* For lastprivate(conditional:) itervar, we need some iteration | |
4780 counter that starts at unsigned non-zero and increases. | |
4781 Prefer as few IVs as possible, so if we can use startvar | |
4782 itself, use that, or startvar + constant (those would be | |
4783 incremented with step), and as last resort use the s0 + 1 | |
4784 incremented by 1. */ | |
4785 if (POINTER_TYPE_P (type) | |
4786 || TREE_CODE (n1) != INTEGER_CST | |
4787 || fd->loop.cond_code != LT_EXPR) | |
4788 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), | |
4789 build_int_cst (itype, 1)); | |
4790 else if (tree_int_cst_sgn (n1) == 1) | |
4791 t = fold_convert (itype, t); | |
4792 else | |
4793 { | |
4794 tree c = fold_convert (itype, n1); | |
4795 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); | |
4796 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); | |
4797 } | |
4798 t = force_gimple_operand_gsi (&gsi, t, false, | |
4799 NULL_TREE, false, GSI_CONTINUE_LINKING); | |
4800 assign_stmt = gimple_build_assign (cond_var, t); | |
4801 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); | |
4802 } | |
4028 | 4803 |
4029 t = fold_convert (itype, e0); | 4804 t = fold_convert (itype, e0); |
4030 t = fold_build2 (MULT_EXPR, itype, t, step); | 4805 t = fold_build2 (MULT_EXPR, itype, t, step); |
4031 if (POINTER_TYPE_P (type)) | 4806 if (POINTER_TYPE_P (type)) |
4032 { | 4807 { |
4110 gsi = gsi_last_nondebug_bb (cont_bb); | 4885 gsi = gsi_last_nondebug_bb (cont_bb); |
4111 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); | 4886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); |
4112 vmain = gimple_omp_continue_control_use (cont_stmt); | 4887 vmain = gimple_omp_continue_control_use (cont_stmt); |
4113 vback = gimple_omp_continue_control_def (cont_stmt); | 4888 vback = gimple_omp_continue_control_def (cont_stmt); |
4114 | 4889 |
4890 if (cond_var) | |
4891 { | |
4892 tree itype = TREE_TYPE (cond_var); | |
4893 tree t2; | |
4894 if (POINTER_TYPE_P (type) | |
4895 || TREE_CODE (n1) != INTEGER_CST | |
4896 || fd->loop.cond_code != LT_EXPR) | |
4897 t2 = build_int_cst (itype, 1); | |
4898 else | |
4899 t2 = fold_convert (itype, step); | |
4900 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); | |
4901 t2 = force_gimple_operand_gsi (&gsi, t2, false, | |
4902 NULL_TREE, true, GSI_SAME_STMT); | |
4903 assign_stmt = gimple_build_assign (cond_var, t2); | |
4904 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); | |
4905 } | |
4906 | |
4115 if (!gimple_omp_for_combined_p (fd->for_stmt)) | 4907 if (!gimple_omp_for_combined_p (fd->for_stmt)) |
4116 { | 4908 { |
4117 if (POINTER_TYPE_P (type)) | 4909 if (POINTER_TYPE_P (type)) |
4118 t = fold_build_pointer_plus (vmain, step); | 4910 t = fold_build_pointer_plus (vmain, step); |
4119 else | 4911 else |
4153 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ | 4945 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ |
4154 gsi = gsi_last_nondebug_bb (exit_bb); | 4946 gsi = gsi_last_nondebug_bb (exit_bb); |
4155 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) | 4947 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) |
4156 { | 4948 { |
4157 t = gimple_omp_return_lhs (gsi_stmt (gsi)); | 4949 t = gimple_omp_return_lhs (gsi_stmt (gsi)); |
4158 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); | 4950 if (fd->have_reductemp || fd->have_pointer_condtemp) |
4951 { | |
4952 tree fn; | |
4953 if (t) | |
4954 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); | |
4955 else | |
4956 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); | |
4957 gcall *g = gimple_build_call (fn, 0); | |
4958 if (t) | |
4959 { | |
4960 gimple_call_set_lhs (g, t); | |
4961 if (fd->have_reductemp) | |
4962 gsi_insert_after (&gsi, gimple_build_assign (reductions, | |
4963 NOP_EXPR, t), | |
4964 GSI_SAME_STMT); | |
4965 } | |
4966 gsi_insert_after (&gsi, g, GSI_SAME_STMT); | |
4967 } | |
4968 else | |
4969 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); | |
4970 } | |
4971 else if (fd->have_pointer_condtemp) | |
4972 { | |
4973 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); | |
4974 gcall *g = gimple_build_call (fn, 0); | |
4975 gsi_insert_after (&gsi, g, GSI_SAME_STMT); | |
4159 } | 4976 } |
4160 gsi_remove (&gsi, true); | 4977 gsi_remove (&gsi, true); |
4161 | 4978 |
4162 /* Connect the new blocks. */ | 4979 /* Connect the new blocks. */ |
4163 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; | 4980 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; |
4211 psi = gsi_start_phis (fin_bb); | 5028 psi = gsi_start_phis (fin_bb); |
4212 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); | 5029 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); |
4213 gsi_next (&psi), ++i) | 5030 gsi_next (&psi), ++i) |
4214 { | 5031 { |
4215 gphi *nphi; | 5032 gphi *nphi; |
4216 source_location locus; | 5033 location_t locus; |
4217 | 5034 |
4218 phi = psi.phi (); | 5035 phi = psi.phi (); |
4219 if (operand_equal_p (gimple_phi_arg_def (phi, 0), | 5036 if (operand_equal_p (gimple_phi_arg_def (phi, 0), |
4220 redirect_edge_var_map_def (vm), 0)) | 5037 redirect_edge_var_map_def (vm), 0)) |
4221 continue; | 5038 continue; |
4284 set_immediate_dominator (CDI_DOMINATORS, body_bb, | 5101 set_immediate_dominator (CDI_DOMINATORS, body_bb, |
4285 recompute_dominator (CDI_DOMINATORS, body_bb)); | 5102 recompute_dominator (CDI_DOMINATORS, body_bb)); |
4286 | 5103 |
4287 if (!broken_loop) | 5104 if (!broken_loop) |
4288 { | 5105 { |
4289 struct loop *loop = body_bb->loop_father; | 5106 class loop *loop = body_bb->loop_father; |
4290 struct loop *trip_loop = alloc_loop (); | 5107 class loop *trip_loop = alloc_loop (); |
4291 trip_loop->header = iter_part_bb; | 5108 trip_loop->header = iter_part_bb; |
4292 trip_loop->latch = trip_update_bb; | 5109 trip_loop->latch = trip_update_bb; |
4293 add_loop (trip_loop, iter_part_bb->loop_father); | 5110 add_loop (trip_loop, iter_part_bb->loop_father); |
4294 | 5111 |
4295 if (loop != entry_bb->loop_father) | 5112 if (loop != entry_bb->loop_father) |
4383 bool broken_loop = region->cont == NULL; | 5200 bool broken_loop = region->cont == NULL; |
4384 edge e, ne; | 5201 edge e, ne; |
4385 tree *counts = NULL; | 5202 tree *counts = NULL; |
4386 int i; | 5203 int i; |
4387 int safelen_int = INT_MAX; | 5204 int safelen_int = INT_MAX; |
5205 bool dont_vectorize = false; | |
4388 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | 5206 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), |
4389 OMP_CLAUSE_SAFELEN); | 5207 OMP_CLAUSE_SAFELEN); |
4390 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | 5208 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), |
4391 OMP_CLAUSE__SIMDUID_); | 5209 OMP_CLAUSE__SIMDUID_); |
5210 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | |
5211 OMP_CLAUSE_IF); | |
5212 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | |
5213 OMP_CLAUSE_SIMDLEN); | |
5214 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), | |
5215 OMP_CLAUSE__CONDTEMP_); | |
4392 tree n1, n2; | 5216 tree n1, n2; |
5217 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE; | |
4393 | 5218 |
4394 if (safelen) | 5219 if (safelen) |
4395 { | 5220 { |
4396 poly_uint64 val; | 5221 poly_uint64 val; |
4397 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); | 5222 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); |
4399 safelen_int = 0; | 5224 safelen_int = 0; |
4400 else | 5225 else |
4401 safelen_int = MIN (constant_lower_bound (val), INT_MAX); | 5226 safelen_int = MIN (constant_lower_bound (val), INT_MAX); |
4402 if (safelen_int == 1) | 5227 if (safelen_int == 1) |
4403 safelen_int = 0; | 5228 safelen_int = 0; |
5229 } | |
5230 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc))) | |
5231 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))) | |
5232 { | |
5233 safelen_int = 0; | |
5234 dont_vectorize = true; | |
4404 } | 5235 } |
4405 type = TREE_TYPE (fd->loop.v); | 5236 type = TREE_TYPE (fd->loop.v); |
4406 entry_bb = region->entry; | 5237 entry_bb = region->entry; |
4407 cont_bb = region->cont; | 5238 cont_bb = region->cont; |
4408 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); | 5239 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); |
4510 if (POINTER_TYPE_P (itype)) | 5341 if (POINTER_TYPE_P (itype)) |
4511 itype = signed_type_for (itype); | 5342 itype = signed_type_for (itype); |
4512 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); | 5343 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); |
4513 expand_omp_build_assign (&gsi, fd->loops[i].v, t); | 5344 expand_omp_build_assign (&gsi, fd->loops[i].v, t); |
4514 } | 5345 } |
5346 } | |
5347 if (cond_var) | |
5348 { | |
5349 if (POINTER_TYPE_P (type) | |
5350 || TREE_CODE (n1) != INTEGER_CST | |
5351 || fd->loop.cond_code != LT_EXPR | |
5352 || tree_int_cst_sgn (n1) != 1) | |
5353 expand_omp_build_assign (&gsi, cond_var, | |
5354 build_one_cst (TREE_TYPE (cond_var))); | |
5355 else | |
5356 expand_omp_build_assign (&gsi, cond_var, | |
5357 fold_convert (TREE_TYPE (cond_var), n1)); | |
4515 } | 5358 } |
4516 | 5359 |
4517 /* Remove the GIMPLE_OMP_FOR statement. */ | 5360 /* Remove the GIMPLE_OMP_FOR statement. */ |
4518 gsi_remove (&gsi, true); | 5361 gsi_remove (&gsi, true); |
4519 | 5362 |
4576 fold_convert (itype, fd->loops[i].n2)), | 5419 fold_convert (itype, fd->loops[i].n2)), |
4577 fd->loops[i].v, t); | 5420 fd->loops[i].v, t); |
4578 expand_omp_build_assign (&gsi, fd->loops[i].v, t); | 5421 expand_omp_build_assign (&gsi, fd->loops[i].v, t); |
4579 } | 5422 } |
4580 } | 5423 } |
5424 if (cond_var) | |
5425 { | |
5426 if (POINTER_TYPE_P (type) | |
5427 || TREE_CODE (n1) != INTEGER_CST | |
5428 || fd->loop.cond_code != LT_EXPR | |
5429 || tree_int_cst_sgn (n1) != 1) | |
5430 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, | |
5431 build_one_cst (TREE_TYPE (cond_var))); | |
5432 else | |
5433 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, | |
5434 fold_convert (TREE_TYPE (cond_var), step)); | |
5435 expand_omp_build_assign (&gsi, cond_var, t); | |
5436 } | |
4581 | 5437 |
4582 /* Remove GIMPLE_OMP_CONTINUE. */ | 5438 /* Remove GIMPLE_OMP_CONTINUE. */ |
4583 gsi_remove (&gsi, true); | 5439 gsi_remove (&gsi, true); |
4584 } | 5440 } |
4585 | 5441 |
4664 } | 5520 } |
4665 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); | 5521 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); |
4666 | 5522 |
4667 if (!broken_loop) | 5523 if (!broken_loop) |
4668 { | 5524 { |
4669 struct loop *loop = alloc_loop (); | 5525 class loop *loop = alloc_loop (); |
4670 loop->header = l1_bb; | 5526 loop->header = l1_bb; |
4671 loop->latch = cont_bb; | 5527 loop->latch = cont_bb; |
4672 add_loop (loop, l1_bb->loop_father); | 5528 add_loop (loop, l1_bb->loop_father); |
4673 loop->safelen = safelen_int; | 5529 loop->safelen = safelen_int; |
4674 if (simduid) | 5530 if (simduid) |
4682 || !global_options_set.x_flag_tree_loop_vectorize) | 5538 || !global_options_set.x_flag_tree_loop_vectorize) |
4683 && flag_tree_loop_optimize | 5539 && flag_tree_loop_optimize |
4684 && loop->safelen > 1) | 5540 && loop->safelen > 1) |
4685 { | 5541 { |
4686 loop->force_vectorize = true; | 5542 loop->force_vectorize = true; |
5543 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))) | |
5544 { | |
5545 unsigned HOST_WIDE_INT v | |
5546 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)); | |
5547 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen) | |
5548 loop->simdlen = v; | |
5549 } | |
4687 cfun->has_force_vectorize_loops = true; | 5550 cfun->has_force_vectorize_loops = true; |
4688 } | 5551 } |
5552 else if (dont_vectorize) | |
5553 loop->dont_vectorize = true; | |
4689 } | 5554 } |
4690 else if (simduid) | 5555 else if (simduid) |
4691 cfun->has_simduid_loops = true; | 5556 cfun->has_simduid_loops = true; |
4692 } | 5557 } |
4693 | 5558 |
5076 set_immediate_dominator (CDI_DOMINATORS, fin_bb, | 5941 set_immediate_dominator (CDI_DOMINATORS, fin_bb, |
5077 recompute_dominator (CDI_DOMINATORS, fin_bb)); | 5942 recompute_dominator (CDI_DOMINATORS, fin_bb)); |
5078 | 5943 |
5079 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) | 5944 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) |
5080 { | 5945 { |
5081 struct loop *loop = alloc_loop (); | 5946 class loop *loop = alloc_loop (); |
5082 loop->header = body_bb; | 5947 loop->header = body_bb; |
5083 if (collapse_bb == NULL) | 5948 if (collapse_bb == NULL) |
5084 loop->latch = cont_bb; | 5949 loop->latch = cont_bb; |
5085 add_loop (loop, body_bb->loop_father); | 5950 add_loop (loop, body_bb->loop_father); |
5086 } | 5951 } |
5593 | 6458 |
5594 if (cont_bb) | 6459 if (cont_bb) |
5595 { | 6460 { |
5596 /* We now have one, two or three nested loops. Update the loop | 6461 /* We now have one, two or three nested loops. Update the loop |
5597 structures. */ | 6462 structures. */ |
5598 struct loop *parent = entry_bb->loop_father; | 6463 class loop *parent = entry_bb->loop_father; |
5599 struct loop *body = body_bb->loop_father; | 6464 class loop *body = body_bb->loop_father; |
5600 | 6465 |
5601 if (chunking) | 6466 if (chunking) |
5602 { | 6467 { |
5603 struct loop *chunk_loop = alloc_loop (); | 6468 class loop *chunk_loop = alloc_loop (); |
5604 chunk_loop->header = head_bb; | 6469 chunk_loop->header = head_bb; |
5605 chunk_loop->latch = bottom_bb; | 6470 chunk_loop->latch = bottom_bb; |
5606 add_loop (chunk_loop, parent); | 6471 add_loop (chunk_loop, parent); |
5607 parent = chunk_loop; | 6472 parent = chunk_loop; |
5608 } | 6473 } |
5614 parent = NULL; | 6479 parent = NULL; |
5615 } | 6480 } |
5616 | 6481 |
5617 if (parent) | 6482 if (parent) |
5618 { | 6483 { |
5619 struct loop *body_loop = alloc_loop (); | 6484 class loop *body_loop = alloc_loop (); |
5620 body_loop->header = body_bb; | 6485 body_loop->header = body_bb; |
5621 body_loop->latch = cont_bb; | 6486 body_loop->latch = cont_bb; |
5622 add_loop (body_loop, parent); | 6487 add_loop (body_loop, parent); |
5623 | 6488 |
5624 if (fd->tiling) | 6489 if (fd->tiling) |
5625 { | 6490 { |
5626 /* Insert tiling's element loop. */ | 6491 /* Insert tiling's element loop. */ |
5627 struct loop *inner_loop = alloc_loop (); | 6492 class loop *inner_loop = alloc_loop (); |
5628 inner_loop->header = elem_body_bb; | 6493 inner_loop->header = elem_body_bb; |
5629 inner_loop->latch = elem_cont_bb; | 6494 inner_loop->latch = elem_cont_bb; |
5630 add_loop (inner_loop, body_loop); | 6495 add_loop (inner_loop, body_loop); |
5631 } | 6496 } |
5632 } | 6497 } |
5647 * sizeof (struct omp_for_data_loop)); | 6512 * sizeof (struct omp_for_data_loop)); |
5648 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), | 6513 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), |
5649 &fd, loops); | 6514 &fd, loops); |
5650 region->sched_kind = fd.sched_kind; | 6515 region->sched_kind = fd.sched_kind; |
5651 region->sched_modifiers = fd.sched_modifiers; | 6516 region->sched_modifiers = fd.sched_modifiers; |
6517 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0; | |
5652 | 6518 |
5653 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); | 6519 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); |
5654 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; | 6520 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; |
5655 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; | 6521 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; |
5656 if (region->cont) | 6522 if (region->cont) |
5663 /* If there isn't a continue then this is a degerate case where | 6529 /* If there isn't a continue then this is a degerate case where |
5664 the introduction of abnormal edges during lowering will prevent | 6530 the introduction of abnormal edges during lowering will prevent |
5665 original loops from being detected. Fix that up. */ | 6531 original loops from being detected. Fix that up. */ |
5666 loops_state_set (LOOPS_NEED_FIXUP); | 6532 loops_state_set (LOOPS_NEED_FIXUP); |
5667 | 6533 |
5668 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) | 6534 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD) |
5669 expand_omp_simd (region, &fd); | 6535 expand_omp_simd (region, &fd); |
5670 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) | 6536 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) |
5671 { | 6537 { |
5672 gcc_assert (!inner_stmt); | 6538 gcc_assert (!inner_stmt); |
5673 expand_oacc_for (region, &fd); | 6539 expand_oacc_for (region, &fd); |
5688 expand_omp_for_static_chunk (region, &fd, inner_stmt); | 6554 expand_omp_for_static_chunk (region, &fd, inner_stmt); |
5689 } | 6555 } |
5690 else | 6556 else |
5691 { | 6557 { |
5692 int fn_index, start_ix, next_ix; | 6558 int fn_index, start_ix, next_ix; |
6559 unsigned HOST_WIDE_INT sched = 0; | |
6560 tree sched_arg = NULL_TREE; | |
5693 | 6561 |
5694 gcc_assert (gimple_omp_for_kind (fd.for_stmt) | 6562 gcc_assert (gimple_omp_for_kind (fd.for_stmt) |
5695 == GF_OMP_FOR_KIND_FOR); | 6563 == GF_OMP_FOR_KIND_FOR); |
5696 if (fd.chunk_size == NULL | 6564 if (fd.chunk_size == NULL |
5697 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) | 6565 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) |
5698 fd.chunk_size = integer_zero_node; | 6566 fd.chunk_size = integer_zero_node; |
5699 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); | |
5700 switch (fd.sched_kind) | 6567 switch (fd.sched_kind) |
5701 { | 6568 { |
5702 case OMP_CLAUSE_SCHEDULE_RUNTIME: | 6569 case OMP_CLAUSE_SCHEDULE_RUNTIME: |
5703 fn_index = 3; | 6570 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0 |
6571 && fd.lastprivate_conditional == 0) | |
6572 { | |
6573 gcc_assert (!fd.have_ordered); | |
6574 fn_index = 6; | |
6575 sched = 4; | |
6576 } | |
6577 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 | |
6578 && !fd.have_ordered | |
6579 && fd.lastprivate_conditional == 0) | |
6580 fn_index = 7; | |
6581 else | |
6582 { | |
6583 fn_index = 3; | |
6584 sched = (HOST_WIDE_INT_1U << 31); | |
6585 } | |
5704 break; | 6586 break; |
5705 case OMP_CLAUSE_SCHEDULE_DYNAMIC: | 6587 case OMP_CLAUSE_SCHEDULE_DYNAMIC: |
5706 case OMP_CLAUSE_SCHEDULE_GUIDED: | 6588 case OMP_CLAUSE_SCHEDULE_GUIDED: |
5707 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) | 6589 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 |
5708 && !fd.ordered | 6590 && !fd.have_ordered |
5709 && !fd.have_ordered) | 6591 && fd.lastprivate_conditional == 0) |
5710 { | 6592 { |
5711 fn_index = 3 + fd.sched_kind; | 6593 fn_index = 3 + fd.sched_kind; |
6594 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; | |
5712 break; | 6595 break; |
5713 } | 6596 } |
5714 /* FALLTHRU */ | 6597 fn_index = fd.sched_kind; |
6598 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; | |
6599 sched += (HOST_WIDE_INT_1U << 31); | |
6600 break; | |
6601 case OMP_CLAUSE_SCHEDULE_STATIC: | |
6602 gcc_assert (fd.have_ordered); | |
6603 fn_index = 0; | |
6604 sched = (HOST_WIDE_INT_1U << 31) + 1; | |
6605 break; | |
5715 default: | 6606 default: |
5716 fn_index = fd.sched_kind; | 6607 gcc_unreachable (); |
5717 break; | |
5718 } | 6608 } |
5719 if (!fd.ordered) | 6609 if (!fd.ordered) |
5720 fn_index += fd.have_ordered * 6; | 6610 fn_index += fd.have_ordered * 8; |
5721 if (fd.ordered) | 6611 if (fd.ordered) |
5722 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; | 6612 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; |
5723 else | 6613 else |
5724 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; | 6614 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; |
5725 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; | 6615 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; |
6616 if (fd.have_reductemp || fd.have_pointer_condtemp) | |
6617 { | |
6618 if (fd.ordered) | |
6619 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; | |
6620 else if (fd.have_ordered) | |
6621 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; | |
6622 else | |
6623 start_ix = (int)BUILT_IN_GOMP_LOOP_START; | |
6624 sched_arg = build_int_cstu (long_integer_type_node, sched); | |
6625 if (!fd.chunk_size) | |
6626 fd.chunk_size = integer_zero_node; | |
6627 } | |
5726 if (fd.iter_type == long_long_unsigned_type_node) | 6628 if (fd.iter_type == long_long_unsigned_type_node) |
5727 { | 6629 { |
5728 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START | 6630 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START |
5729 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); | 6631 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); |
5730 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT | 6632 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT |
5731 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); | 6633 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); |
5732 } | 6634 } |
5733 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, | 6635 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, |
5734 (enum built_in_function) next_ix, inner_stmt); | 6636 (enum built_in_function) next_ix, sched_arg, |
6637 inner_stmt); | |
5735 } | 6638 } |
5736 | 6639 |
5737 if (gimple_in_ssa_p (cfun)) | 6640 if (gimple_in_ssa_p (cfun)) |
5738 update_ssa (TODO_update_ssa_only_virtuals); | 6641 update_ssa (TODO_update_ssa_only_virtuals); |
5739 } | 6642 } |
5829 GIMPLE_OMP_SECTIONS statement. */ | 6732 GIMPLE_OMP_SECTIONS statement. */ |
5830 si = gsi_last_nondebug_bb (entry_bb); | 6733 si = gsi_last_nondebug_bb (entry_bb); |
5831 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); | 6734 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); |
5832 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); | 6735 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); |
5833 vin = gimple_omp_sections_control (sections_stmt); | 6736 vin = gimple_omp_sections_control (sections_stmt); |
5834 if (!is_combined_parallel (region)) | 6737 tree clauses = gimple_omp_sections_clauses (sections_stmt); |
6738 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); | |
6739 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); | |
6740 tree cond_var = NULL_TREE; | |
6741 if (reductmp || condtmp) | |
6742 { | |
6743 tree reductions = null_pointer_node, mem = null_pointer_node; | |
6744 tree memv = NULL_TREE, condtemp = NULL_TREE; | |
6745 gimple_stmt_iterator gsi = gsi_none (); | |
6746 gimple *g = NULL; | |
6747 if (reductmp) | |
6748 { | |
6749 reductions = OMP_CLAUSE_DECL (reductmp); | |
6750 gcc_assert (TREE_CODE (reductions) == SSA_NAME); | |
6751 g = SSA_NAME_DEF_STMT (reductions); | |
6752 reductions = gimple_assign_rhs1 (g); | |
6753 OMP_CLAUSE_DECL (reductmp) = reductions; | |
6754 gsi = gsi_for_stmt (g); | |
6755 } | |
6756 else | |
6757 gsi = si; | |
6758 if (condtmp) | |
6759 { | |
6760 condtemp = OMP_CLAUSE_DECL (condtmp); | |
6761 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp), | |
6762 OMP_CLAUSE__CONDTEMP_); | |
6763 cond_var = OMP_CLAUSE_DECL (c); | |
6764 tree type = TREE_TYPE (condtemp); | |
6765 memv = create_tmp_var (type); | |
6766 TREE_ADDRESSABLE (memv) = 1; | |
6767 unsigned cnt = 0; | |
6768 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) | |
6769 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE | |
6770 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) | |
6771 ++cnt; | |
6772 unsigned HOST_WIDE_INT sz | |
6773 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt; | |
6774 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), | |
6775 false); | |
6776 mem = build_fold_addr_expr (memv); | |
6777 } | |
6778 t = build_int_cst (unsigned_type_node, len - 1); | |
6779 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); | |
6780 stmt = gimple_build_call (u, 3, t, reductions, mem); | |
6781 gimple_call_set_lhs (stmt, vin); | |
6782 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); | |
6783 if (condtmp) | |
6784 { | |
6785 expand_omp_build_assign (&gsi, condtemp, memv, false); | |
6786 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), | |
6787 vin, build_one_cst (TREE_TYPE (cond_var))); | |
6788 expand_omp_build_assign (&gsi, cond_var, t, false); | |
6789 } | |
6790 if (reductmp) | |
6791 { | |
6792 gsi_remove (&gsi, true); | |
6793 release_ssa_name (gimple_assign_lhs (g)); | |
6794 } | |
6795 } | |
6796 else if (!is_combined_parallel (region)) | |
5835 { | 6797 { |
5836 /* If we are not inside a combined parallel+sections region, | 6798 /* If we are not inside a combined parallel+sections region, |
5837 call GOMP_sections_start. */ | 6799 call GOMP_sections_start. */ |
5838 t = build_int_cst (unsigned_type_node, len - 1); | 6800 t = build_int_cst (unsigned_type_node, len - 1); |
5839 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); | 6801 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); |
5843 { | 6805 { |
5844 /* Otherwise, call GOMP_sections_next. */ | 6806 /* Otherwise, call GOMP_sections_next. */ |
5845 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); | 6807 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); |
5846 stmt = gimple_build_call (u, 0); | 6808 stmt = gimple_build_call (u, 0); |
5847 } | 6809 } |
5848 gimple_call_set_lhs (stmt, vin); | 6810 if (!reductmp && !condtmp) |
5849 gsi_insert_after (&si, stmt, GSI_SAME_STMT); | 6811 { |
6812 gimple_call_set_lhs (stmt, vin); | |
6813 gsi_insert_after (&si, stmt, GSI_SAME_STMT); | |
6814 } | |
5850 gsi_remove (&si, true); | 6815 gsi_remove (&si, true); |
5851 | 6816 |
5852 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in | 6817 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in |
5853 L0_BB. */ | 6818 L0_BB. */ |
5854 switch_si = gsi_last_nondebug_bb (l0_bb); | 6819 switch_si = gsi_last_nondebug_bb (l0_bb); |
5932 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); | 6897 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); |
5933 | 6898 |
5934 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); | 6899 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); |
5935 stmt = gimple_build_call (bfn_decl, 0); | 6900 stmt = gimple_build_call (bfn_decl, 0); |
5936 gimple_call_set_lhs (stmt, vnext); | 6901 gimple_call_set_lhs (stmt, vnext); |
5937 gsi_insert_after (&si, stmt, GSI_SAME_STMT); | 6902 gsi_insert_before (&si, stmt, GSI_SAME_STMT); |
6903 if (cond_var) | |
6904 { | |
6905 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), | |
6906 vnext, build_one_cst (TREE_TYPE (cond_var))); | |
6907 expand_omp_build_assign (&si, cond_var, t, false); | |
6908 } | |
5938 gsi_remove (&si, true); | 6909 gsi_remove (&si, true); |
5939 | 6910 |
5940 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; | 6911 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; |
5941 } | 6912 } |
5942 | 6913 |
6002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER | 6973 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER |
6003 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP | 6974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP |
6004 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED | 6975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED |
6005 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL | 6976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL |
6006 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); | 6977 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); |
6978 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS | |
6979 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si)))) | |
6980 { | |
6981 expand_omp_taskreg (region); | |
6982 return; | |
6983 } | |
6007 gsi_remove (&si, true); | 6984 gsi_remove (&si, true); |
6008 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; | 6985 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; |
6009 | 6986 |
6010 if (exit_bb) | 6987 if (exit_bb) |
6011 { | 6988 { |
6012 si = gsi_last_nondebug_bb (exit_bb); | 6989 si = gsi_last_nondebug_bb (exit_bb); |
6013 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); | 6990 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); |
6014 gsi_remove (&si, true); | 6991 gsi_remove (&si, true); |
6015 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; | 6992 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; |
6993 } | |
6994 } | |
6995 | |
6996 /* Translate enum omp_memory_order to enum memmodel. The two enums | |
6997 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED | |
6998 is 0. */ | |
6999 | |
7000 static enum memmodel | |
7001 omp_memory_order_to_memmodel (enum omp_memory_order mo) | |
7002 { | |
7003 switch (mo) | |
7004 { | |
7005 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; | |
7006 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; | |
7007 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE; | |
7008 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL; | |
7009 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; | |
7010 default: gcc_unreachable (); | |
6016 } | 7011 } |
6017 } | 7012 } |
6018 | 7013 |
6019 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic | 7014 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic |
6020 operation as a normal volatile load. */ | 7015 operation as a normal volatile load. */ |
6045 return false; | 7040 return false; |
6046 | 7041 |
6047 type = TREE_TYPE (loaded_val); | 7042 type = TREE_TYPE (loaded_val); |
6048 itype = TREE_TYPE (TREE_TYPE (decl)); | 7043 itype = TREE_TYPE (TREE_TYPE (decl)); |
6049 | 7044 |
6050 call = build_call_expr_loc (loc, decl, 2, addr, | 7045 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); |
6051 build_int_cst (NULL, | 7046 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); |
6052 gimple_omp_atomic_seq_cst_p (stmt) | 7047 call = build_call_expr_loc (loc, decl, 2, addr, mo); |
6053 ? MEMMODEL_SEQ_CST | |
6054 : MEMMODEL_RELAXED)); | |
6055 if (!useless_type_conversion_p (type, itype)) | 7048 if (!useless_type_conversion_p (type, itype)) |
6056 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); | 7049 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); |
6057 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); | 7050 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); |
6058 | 7051 |
6059 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); | 7052 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); |
6120 if (exchange && !can_atomic_exchange_p (imode, true)) | 7113 if (exchange && !can_atomic_exchange_p (imode, true)) |
6121 return false; | 7114 return false; |
6122 | 7115 |
6123 if (!useless_type_conversion_p (itype, type)) | 7116 if (!useless_type_conversion_p (itype, type)) |
6124 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); | 7117 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); |
6125 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, | 7118 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); |
6126 build_int_cst (NULL, | 7119 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); |
6127 gimple_omp_atomic_seq_cst_p (stmt) | 7120 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo); |
6128 ? MEMMODEL_SEQ_CST | |
6129 : MEMMODEL_RELAXED)); | |
6130 if (exchange) | 7121 if (exchange) |
6131 { | 7122 { |
6132 if (!useless_type_conversion_p (type, itype)) | 7123 if (!useless_type_conversion_p (type, itype)) |
6133 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); | 7124 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); |
6134 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); | 7125 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); |
6165 gimple *stmt; | 7156 gimple *stmt; |
6166 location_t loc; | 7157 location_t loc; |
6167 enum tree_code code; | 7158 enum tree_code code; |
6168 bool need_old, need_new; | 7159 bool need_old, need_new; |
6169 machine_mode imode; | 7160 machine_mode imode; |
6170 bool seq_cst; | |
6171 | 7161 |
6172 /* We expect to find the following sequences: | 7162 /* We expect to find the following sequences: |
6173 | 7163 |
6174 load_bb: | 7164 load_bb: |
6175 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) | 7165 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) |
6198 gsi_next_nondebug (&gsi); | 7188 gsi_next_nondebug (&gsi); |
6199 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) | 7189 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) |
6200 return false; | 7190 return false; |
6201 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); | 7191 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); |
6202 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); | 7192 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); |
6203 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); | 7193 enum omp_memory_order omo |
7194 = gimple_omp_atomic_memory_order (last_stmt (load_bb)); | |
7195 enum memmodel mo = omp_memory_order_to_memmodel (omo); | |
6204 gcc_checking_assert (!need_old || !need_new); | 7196 gcc_checking_assert (!need_old || !need_new); |
6205 | 7197 |
6206 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) | 7198 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) |
6207 return false; | 7199 return false; |
6208 | 7200 |
6265 /* OpenMP does not imply any barrier-like semantics on its atomic ops. | 7257 /* OpenMP does not imply any barrier-like semantics on its atomic ops. |
6266 It only requires that the operation happen atomically. Thus we can | 7258 It only requires that the operation happen atomically. Thus we can |
6267 use the RELAXED memory model. */ | 7259 use the RELAXED memory model. */ |
6268 call = build_call_expr_loc (loc, decl, 3, addr, | 7260 call = build_call_expr_loc (loc, decl, 3, addr, |
6269 fold_convert_loc (loc, itype, rhs), | 7261 fold_convert_loc (loc, itype, rhs), |
6270 build_int_cst (NULL, | 7262 build_int_cst (NULL, mo)); |
6271 seq_cst ? MEMMODEL_SEQ_CST | |
6272 : MEMMODEL_RELAXED)); | |
6273 | 7263 |
6274 if (need_old || need_new) | 7264 if (need_old || need_new) |
6275 { | 7265 { |
6276 lhs = need_old ? loaded_val : stored_val; | 7266 lhs = need_old ? loaded_val : stored_val; |
6277 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); | 7267 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); |
6482 } | 7472 } |
6483 | 7473 |
6484 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ | 7474 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ |
6485 gsi_remove (&si, true); | 7475 gsi_remove (&si, true); |
6486 | 7476 |
6487 struct loop *loop = alloc_loop (); | 7477 class loop *loop = alloc_loop (); |
6488 loop->header = loop_header; | 7478 loop->header = loop_header; |
6489 loop->latch = store_bb; | 7479 loop->latch = store_bb; |
6490 add_loop (loop, loop_header->loop_father); | 7480 add_loop (loop, loop_header->loop_father); |
6491 | 7481 |
6492 if (gimple_in_ssa_p (cfun)) | 7482 if (gimple_in_ssa_p (cfun)) |
6632 | 7622 |
6633 static void | 7623 static void |
6634 mark_loops_in_oacc_kernels_region (basic_block region_entry, | 7624 mark_loops_in_oacc_kernels_region (basic_block region_entry, |
6635 basic_block region_exit) | 7625 basic_block region_exit) |
6636 { | 7626 { |
6637 struct loop *outer = region_entry->loop_father; | 7627 class loop *outer = region_entry->loop_father; |
6638 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); | 7628 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); |
6639 | 7629 |
6640 /* Don't parallelize the kernels region if it contains more than one outer | 7630 /* Don't parallelize the kernels region if it contains more than one outer |
6641 loop. */ | 7631 loop. */ |
6642 unsigned int nr_outer_loops = 0; | 7632 unsigned int nr_outer_loops = 0; |
6643 struct loop *single_outer = NULL; | 7633 class loop *single_outer = NULL; |
6644 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) | 7634 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next) |
6645 { | 7635 { |
6646 gcc_assert (loop_outer (loop) == outer); | 7636 gcc_assert (loop_outer (loop) == outer); |
6647 | 7637 |
6648 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) | 7638 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) |
6649 continue; | 7639 continue; |
6656 single_outer = loop; | 7646 single_outer = loop; |
6657 } | 7647 } |
6658 if (nr_outer_loops != 1) | 7648 if (nr_outer_loops != 1) |
6659 return; | 7649 return; |
6660 | 7650 |
6661 for (struct loop *loop = single_outer->inner; | 7651 for (class loop *loop = single_outer->inner; |
6662 loop != NULL; | 7652 loop != NULL; |
6663 loop = loop->inner) | 7653 loop = loop->inner) |
6664 if (loop->next) | 7654 if (loop->next) |
6665 return; | 7655 return; |
6666 | 7656 |
6667 /* Mark the loops in the region. */ | 7657 /* Mark the loops in the region. */ |
6668 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) | 7658 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner) |
6669 loop->in_oacc_kernels_region = true; | 7659 loop->in_oacc_kernels_region = true; |
6670 } | 7660 } |
6671 | 7661 |
6672 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */ | 7662 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */ |
6673 | 7663 |
6909 gimple_stmt_iterator gsi; | 7899 gimple_stmt_iterator gsi; |
6910 gomp_target *entry_stmt; | 7900 gomp_target *entry_stmt; |
6911 gimple *stmt; | 7901 gimple *stmt; |
6912 edge e; | 7902 edge e; |
6913 bool offloaded, data_region; | 7903 bool offloaded, data_region; |
7904 int target_kind; | |
6914 | 7905 |
6915 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); | 7906 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); |
7907 target_kind = gimple_omp_target_kind (entry_stmt); | |
6916 new_bb = region->entry; | 7908 new_bb = region->entry; |
6917 | 7909 |
6918 offloaded = is_gimple_omp_offloaded (entry_stmt); | 7910 offloaded = is_gimple_omp_offloaded (entry_stmt); |
6919 switch (gimple_omp_target_kind (entry_stmt)) | 7911 switch (target_kind) |
6920 { | 7912 { |
6921 case GF_OMP_TARGET_KIND_REGION: | 7913 case GF_OMP_TARGET_KIND_REGION: |
6922 case GF_OMP_TARGET_KIND_UPDATE: | 7914 case GF_OMP_TARGET_KIND_UPDATE: |
6923 case GF_OMP_TARGET_KIND_ENTER_DATA: | 7915 case GF_OMP_TARGET_KIND_ENTER_DATA: |
6924 case GF_OMP_TARGET_KIND_EXIT_DATA: | 7916 case GF_OMP_TARGET_KIND_EXIT_DATA: |
6925 case GF_OMP_TARGET_KIND_OACC_PARALLEL: | 7917 case GF_OMP_TARGET_KIND_OACC_PARALLEL: |
6926 case GF_OMP_TARGET_KIND_OACC_KERNELS: | 7918 case GF_OMP_TARGET_KIND_OACC_KERNELS: |
7919 case GF_OMP_TARGET_KIND_OACC_SERIAL: | |
6927 case GF_OMP_TARGET_KIND_OACC_UPDATE: | 7920 case GF_OMP_TARGET_KIND_OACC_UPDATE: |
6928 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: | 7921 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: |
6929 case GF_OMP_TARGET_KIND_OACC_DECLARE: | 7922 case GF_OMP_TARGET_KIND_OACC_DECLARE: |
6930 data_region = false; | 7923 data_region = false; |
6931 break; | 7924 break; |
6952 gcc_checking_assert (!gimple_in_ssa_p (cfun)); | 7945 gcc_checking_assert (!gimple_in_ssa_p (cfun)); |
6953 | 7946 |
6954 entry_bb = region->entry; | 7947 entry_bb = region->entry; |
6955 exit_bb = region->exit; | 7948 exit_bb = region->exit; |
6956 | 7949 |
6957 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) | 7950 switch (target_kind) |
6958 { | 7951 { |
7952 case GF_OMP_TARGET_KIND_OACC_KERNELS: | |
6959 mark_loops_in_oacc_kernels_region (region->entry, region->exit); | 7953 mark_loops_in_oacc_kernels_region (region->entry, region->exit); |
6960 | 7954 |
6961 /* Further down, both OpenACC kernels and OpenACC parallel constructs | 7955 /* Further down, all OpenACC compute constructs will be mapped to |
6962 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the | 7956 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there |
6963 two, there is an "oacc kernels" attribute set for OpenACC kernels. */ | 7957 is an "oacc kernels" attribute set for OpenACC kernels. */ |
6964 DECL_ATTRIBUTES (child_fn) | 7958 DECL_ATTRIBUTES (child_fn) |
6965 = tree_cons (get_identifier ("oacc kernels"), | 7959 = tree_cons (get_identifier ("oacc kernels"), |
6966 NULL_TREE, DECL_ATTRIBUTES (child_fn)); | 7960 NULL_TREE, DECL_ATTRIBUTES (child_fn)); |
7961 break; | |
7962 case GF_OMP_TARGET_KIND_OACC_SERIAL: | |
7963 /* Further down, all OpenACC compute constructs will be mapped to | |
7964 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there | |
7965 is an "oacc serial" attribute set for OpenACC serial. */ | |
7966 DECL_ATTRIBUTES (child_fn) | |
7967 = tree_cons (get_identifier ("oacc serial"), | |
7968 NULL_TREE, DECL_ATTRIBUTES (child_fn)); | |
7969 break; | |
7970 default: | |
7971 break; | |
6967 } | 7972 } |
6968 | 7973 |
6969 if (offloaded) | 7974 if (offloaded) |
6970 { | 7975 { |
6971 unsigned srcidx, dstidx, num; | 7976 unsigned srcidx, dstidx, num; |
7060 stmt = gimple_build_return (NULL); | 8065 stmt = gimple_build_return (NULL); |
7061 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); | 8066 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
7062 gsi_remove (&gsi, true); | 8067 gsi_remove (&gsi, true); |
7063 } | 8068 } |
7064 | 8069 |
7065 /* Make sure to generate early debug for the function before | |
7066 outlining anything. */ | |
7067 if (! gimple_in_ssa_p (cfun)) | |
7068 (*debug_hooks->early_global_decl) (cfun->decl); | |
7069 | |
7070 /* Move the offloading region into CHILD_CFUN. */ | 8070 /* Move the offloading region into CHILD_CFUN. */ |
7071 | 8071 |
7072 block = gimple_block (entry_stmt); | 8072 block = gimple_block (entry_stmt); |
7073 | 8073 |
7074 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); | 8074 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); |
7141 { | 8141 { |
7142 omp_any_child_fn_dumped = true; | 8142 omp_any_child_fn_dumped = true; |
7143 dump_function_header (dump_file, child_fn, dump_flags); | 8143 dump_function_header (dump_file, child_fn, dump_flags); |
7144 dump_function_to_file (child_fn, dump_file, dump_flags); | 8144 dump_function_to_file (child_fn, dump_file, dump_flags); |
7145 } | 8145 } |
8146 | |
8147 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); | |
7146 } | 8148 } |
7147 | 8149 |
7148 /* Emit a library call to launch the offloading region, or do data | 8150 /* Emit a library call to launch the offloading region, or do data |
7149 transfers. */ | 8151 transfers. */ |
7150 tree t1, t2, t3, t4, device, cond, depend, c, clauses; | 8152 tree t1, t2, t3, t4, depend, c, clauses; |
7151 enum built_in_function start_ix; | 8153 enum built_in_function start_ix; |
7152 location_t clause_loc; | |
7153 unsigned int flags_i = 0; | 8154 unsigned int flags_i = 0; |
7154 | 8155 |
7155 switch (gimple_omp_target_kind (entry_stmt)) | 8156 switch (gimple_omp_target_kind (entry_stmt)) |
7156 { | 8157 { |
7157 case GF_OMP_TARGET_KIND_REGION: | 8158 case GF_OMP_TARGET_KIND_REGION: |
7168 break; | 8169 break; |
7169 case GF_OMP_TARGET_KIND_EXIT_DATA: | 8170 case GF_OMP_TARGET_KIND_EXIT_DATA: |
7170 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; | 8171 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; |
7171 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; | 8172 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; |
7172 break; | 8173 break; |
8174 case GF_OMP_TARGET_KIND_OACC_PARALLEL: | |
7173 case GF_OMP_TARGET_KIND_OACC_KERNELS: | 8175 case GF_OMP_TARGET_KIND_OACC_KERNELS: |
7174 case GF_OMP_TARGET_KIND_OACC_PARALLEL: | 8176 case GF_OMP_TARGET_KIND_OACC_SERIAL: |
7175 start_ix = BUILT_IN_GOACC_PARALLEL; | 8177 start_ix = BUILT_IN_GOACC_PARALLEL; |
7176 break; | 8178 break; |
7177 case GF_OMP_TARGET_KIND_OACC_DATA: | 8179 case GF_OMP_TARGET_KIND_OACC_DATA: |
7178 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: | 8180 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: |
7179 start_ix = BUILT_IN_GOACC_DATA_START; | 8181 start_ix = BUILT_IN_GOACC_DATA_START; |
7191 gcc_unreachable (); | 8193 gcc_unreachable (); |
7192 } | 8194 } |
7193 | 8195 |
7194 clauses = gimple_omp_target_clauses (entry_stmt); | 8196 clauses = gimple_omp_target_clauses (entry_stmt); |
7195 | 8197 |
7196 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime | 8198 tree device = NULL_TREE; |
7197 library choose) and there is no conditional. */ | 8199 location_t device_loc = UNKNOWN_LOCATION; |
7198 cond = NULL_TREE; | 8200 tree goacc_flags = NULL_TREE; |
7199 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); | 8201 if (is_gimple_omp_oacc (entry_stmt)) |
7200 | 8202 { |
8203 /* By default, no GOACC_FLAGs are set. */ | |
8204 goacc_flags = integer_zero_node; | |
8205 } | |
8206 else | |
8207 { | |
8208 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); | |
8209 if (c) | |
8210 { | |
8211 device = OMP_CLAUSE_DEVICE_ID (c); | |
8212 device_loc = OMP_CLAUSE_LOCATION (c); | |
8213 } | |
8214 else | |
8215 { | |
8216 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime | |
8217 library choose). */ | |
8218 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); | |
8219 device_loc = gimple_location (entry_stmt); | |
8220 } | |
8221 | |
8222 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); | |
8223 if (c) | |
8224 flags_i |= GOMP_TARGET_FLAG_NOWAIT; | |
8225 } | |
8226 | |
8227 /* By default, there is no conditional. */ | |
8228 tree cond = NULL_TREE; | |
7201 c = omp_find_clause (clauses, OMP_CLAUSE_IF); | 8229 c = omp_find_clause (clauses, OMP_CLAUSE_IF); |
7202 if (c) | 8230 if (c) |
7203 cond = OMP_CLAUSE_IF_EXPR (c); | 8231 cond = OMP_CLAUSE_IF_EXPR (c); |
7204 | 8232 /* If we found the clause 'if (cond)', build: |
7205 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); | 8233 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK) |
7206 if (c) | 8234 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */ |
7207 { | |
7208 /* Even if we pass it to all library function calls, it is currently only | |
7209 defined/used for the OpenMP target ones. */ | |
7210 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET | |
7211 || start_ix == BUILT_IN_GOMP_TARGET_DATA | |
7212 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE | |
7213 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); | |
7214 | |
7215 device = OMP_CLAUSE_DEVICE_ID (c); | |
7216 clause_loc = OMP_CLAUSE_LOCATION (c); | |
7217 } | |
7218 else | |
7219 clause_loc = gimple_location (entry_stmt); | |
7220 | |
7221 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); | |
7222 if (c) | |
7223 flags_i |= GOMP_TARGET_FLAG_NOWAIT; | |
7224 | |
7225 /* Ensure 'device' is of the correct type. */ | |
7226 device = fold_convert_loc (clause_loc, integer_type_node, device); | |
7227 | |
7228 /* If we found the clause 'if (cond)', build | |
7229 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ | |
7230 if (cond) | 8235 if (cond) |
7231 { | 8236 { |
8237 tree *tp; | |
8238 if (is_gimple_omp_oacc (entry_stmt)) | |
8239 tp = &goacc_flags; | |
8240 else | |
8241 { | |
8242 /* Ensure 'device' is of the correct type. */ | |
8243 device = fold_convert_loc (device_loc, integer_type_node, device); | |
8244 | |
8245 tp = &device; | |
8246 } | |
8247 | |
7232 cond = gimple_boolify (cond); | 8248 cond = gimple_boolify (cond); |
7233 | 8249 |
7234 basic_block cond_bb, then_bb, else_bb; | 8250 basic_block cond_bb, then_bb, else_bb; |
7235 edge e; | 8251 edge e; |
7236 tree tmp_var; | 8252 tree tmp_var; |
7237 | 8253 |
7238 tmp_var = create_tmp_var (TREE_TYPE (device)); | 8254 tmp_var = create_tmp_var (TREE_TYPE (*tp)); |
7239 if (offloaded) | 8255 if (offloaded) |
7240 e = split_block_after_labels (new_bb); | 8256 e = split_block_after_labels (new_bb); |
7241 else | 8257 else |
7242 { | 8258 { |
7243 gsi = gsi_last_nondebug_bb (new_bb); | 8259 gsi = gsi_last_nondebug_bb (new_bb); |
7256 stmt = gimple_build_cond_empty (cond); | 8272 stmt = gimple_build_cond_empty (cond); |
7257 gsi = gsi_last_bb (cond_bb); | 8273 gsi = gsi_last_bb (cond_bb); |
7258 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); | 8274 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
7259 | 8275 |
7260 gsi = gsi_start_bb (then_bb); | 8276 gsi = gsi_start_bb (then_bb); |
7261 stmt = gimple_build_assign (tmp_var, device); | 8277 stmt = gimple_build_assign (tmp_var, *tp); |
7262 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); | 8278 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
7263 | 8279 |
7264 gsi = gsi_start_bb (else_bb); | 8280 gsi = gsi_start_bb (else_bb); |
7265 stmt = gimple_build_assign (tmp_var, | 8281 if (is_gimple_omp_oacc (entry_stmt)) |
7266 build_int_cst (integer_type_node, | 8282 stmt = gimple_build_assign (tmp_var, |
7267 GOMP_DEVICE_HOST_FALLBACK)); | 8283 BIT_IOR_EXPR, |
8284 *tp, | |
8285 build_int_cst (integer_type_node, | |
8286 GOACC_FLAG_HOST_FALLBACK)); | |
8287 else | |
8288 stmt = gimple_build_assign (tmp_var, | |
8289 build_int_cst (integer_type_node, | |
8290 GOMP_DEVICE_HOST_FALLBACK)); | |
7268 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); | 8291 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
7269 | 8292 |
7270 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); | 8293 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); |
7271 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); | 8294 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); |
7272 add_bb_to_loop (then_bb, cond_bb->loop_father); | 8295 add_bb_to_loop (then_bb, cond_bb->loop_father); |
7273 add_bb_to_loop (else_bb, cond_bb->loop_father); | 8296 add_bb_to_loop (else_bb, cond_bb->loop_father); |
7274 make_edge (then_bb, new_bb, EDGE_FALLTHRU); | 8297 make_edge (then_bb, new_bb, EDGE_FALLTHRU); |
7275 make_edge (else_bb, new_bb, EDGE_FALLTHRU); | 8298 make_edge (else_bb, new_bb, EDGE_FALLTHRU); |
7276 | 8299 |
7277 device = tmp_var; | 8300 *tp = tmp_var; |
8301 | |
7278 gsi = gsi_last_nondebug_bb (new_bb); | 8302 gsi = gsi_last_nondebug_bb (new_bb); |
7279 } | 8303 } |
7280 else | 8304 else |
7281 { | 8305 { |
7282 gsi = gsi_last_nondebug_bb (new_bb); | 8306 gsi = gsi_last_nondebug_bb (new_bb); |
7283 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, | 8307 |
7284 true, GSI_SAME_STMT); | 8308 if (device != NULL_TREE) |
8309 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, | |
8310 true, GSI_SAME_STMT); | |
7285 } | 8311 } |
7286 | 8312 |
7287 t = gimple_omp_target_data_arg (entry_stmt); | 8313 t = gimple_omp_target_data_arg (entry_stmt); |
7288 if (t == NULL) | 8314 if (t == NULL) |
7289 { | 8315 { |
7303 | 8329 |
7304 gimple *g; | 8330 gimple *g; |
7305 bool tagging = false; | 8331 bool tagging = false; |
7306 /* The maximum number used by any start_ix, without varargs. */ | 8332 /* The maximum number used by any start_ix, without varargs. */ |
7307 auto_vec<tree, 11> args; | 8333 auto_vec<tree, 11> args; |
7308 args.quick_push (device); | 8334 if (is_gimple_omp_oacc (entry_stmt)) |
8335 { | |
8336 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP, | |
8337 TREE_TYPE (goacc_flags), goacc_flags); | |
8338 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true, | |
8339 NULL_TREE, true, | |
8340 GSI_SAME_STMT); | |
8341 args.quick_push (goacc_flags_m); | |
8342 } | |
8343 else | |
8344 args.quick_push (device); | |
7309 if (offloaded) | 8345 if (offloaded) |
7310 args.quick_push (build_fold_addr_expr (child_fn)); | 8346 args.quick_push (build_fold_addr_expr (child_fn)); |
7311 args.quick_push (t1); | 8347 args.quick_push (t1); |
7312 args.quick_push (t2); | 8348 args.quick_push (t2); |
7313 args.quick_push (t3); | 8349 args.quick_push (t3); |
7330 args.quick_push (depend); | 8366 args.quick_push (depend); |
7331 if (start_ix == BUILT_IN_GOMP_TARGET) | 8367 if (start_ix == BUILT_IN_GOMP_TARGET) |
7332 args.quick_push (get_target_arguments (&gsi, entry_stmt)); | 8368 args.quick_push (get_target_arguments (&gsi, entry_stmt)); |
7333 break; | 8369 break; |
7334 case BUILT_IN_GOACC_PARALLEL: | 8370 case BUILT_IN_GOACC_PARALLEL: |
7335 oacc_set_fn_attrib (child_fn, clauses, &args); | 8371 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL) |
8372 { | |
8373 tree dims = NULL_TREE; | |
8374 unsigned int ix; | |
8375 | |
8376 /* For serial constructs we set all dimensions to 1. */ | |
8377 for (ix = GOMP_DIM_MAX; ix--;) | |
8378 dims = tree_cons (NULL_TREE, integer_one_node, dims); | |
8379 oacc_replace_fn_attrib (child_fn, dims); | |
8380 } | |
8381 else | |
8382 oacc_set_fn_attrib (child_fn, clauses, &args); | |
7336 tagging = true; | 8383 tagging = true; |
7337 /* FALLTHRU */ | 8384 /* FALLTHRU */ |
7338 case BUILT_IN_GOACC_ENTER_EXIT_DATA: | 8385 case BUILT_IN_GOACC_ENTER_EXIT_DATA: |
7339 case BUILT_IN_GOACC_UPDATE: | 8386 case BUILT_IN_GOACC_UPDATE: |
7340 { | 8387 { |
7623 } | 8670 } |
7624 if (gpukernel->inner) | 8671 if (gpukernel->inner) |
7625 expand_omp (gpukernel->inner); | 8672 expand_omp (gpukernel->inner); |
7626 | 8673 |
7627 tree kern_fndecl = copy_node (orig_child_fndecl); | 8674 tree kern_fndecl = copy_node (orig_child_fndecl); |
7628 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); | 8675 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl, |
8676 "kernel"); | |
7629 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); | 8677 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); |
7630 tree tgtblock = gimple_block (tgt_stmt); | 8678 tree tgtblock = gimple_block (tgt_stmt); |
7631 tree fniniblock = make_node (BLOCK); | 8679 tree fniniblock = make_node (BLOCK); |
7632 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); | 8680 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); |
7633 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); | 8681 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); |
7636 DECL_INITIAL (kern_fndecl) = fniniblock; | 8684 DECL_INITIAL (kern_fndecl) = fniniblock; |
7637 push_struct_function (kern_fndecl); | 8685 push_struct_function (kern_fndecl); |
7638 cfun->function_end_locus = gimple_location (tgt_stmt); | 8686 cfun->function_end_locus = gimple_location (tgt_stmt); |
7639 init_tree_ssa (cfun); | 8687 init_tree_ssa (cfun); |
7640 pop_cfun (); | 8688 pop_cfun (); |
7641 | |
7642 /* Make sure to generate early debug for the function before | |
7643 outlining anything. */ | |
7644 if (! gimple_in_ssa_p (cfun)) | |
7645 (*debug_hooks->early_global_decl) (cfun->decl); | |
7646 | 8689 |
7647 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); | 8690 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); |
7648 gcc_assert (!DECL_CHAIN (old_parm_decl)); | 8691 gcc_assert (!DECL_CHAIN (old_parm_decl)); |
7649 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); | 8692 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); |
7650 DECL_CONTEXT (new_parm_decl) = kern_fndecl; | 8693 DECL_CONTEXT (new_parm_decl) = kern_fndecl; |
7895 { | 8938 { |
7896 case GF_OMP_TARGET_KIND_REGION: | 8939 case GF_OMP_TARGET_KIND_REGION: |
7897 case GF_OMP_TARGET_KIND_DATA: | 8940 case GF_OMP_TARGET_KIND_DATA: |
7898 case GF_OMP_TARGET_KIND_OACC_PARALLEL: | 8941 case GF_OMP_TARGET_KIND_OACC_PARALLEL: |
7899 case GF_OMP_TARGET_KIND_OACC_KERNELS: | 8942 case GF_OMP_TARGET_KIND_OACC_KERNELS: |
8943 case GF_OMP_TARGET_KIND_OACC_SERIAL: | |
7900 case GF_OMP_TARGET_KIND_OACC_DATA: | 8944 case GF_OMP_TARGET_KIND_OACC_DATA: |
7901 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: | 8945 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: |
7902 break; | 8946 break; |
7903 case GF_OMP_TARGET_KIND_UPDATE: | 8947 case GF_OMP_TARGET_KIND_UPDATE: |
7904 case GF_OMP_TARGET_KIND_ENTER_DATA: | 8948 case GF_OMP_TARGET_KIND_ENTER_DATA: |
7918 (as_a <gomp_ordered *> (stmt)), | 8962 (as_a <gomp_ordered *> (stmt)), |
7919 OMP_CLAUSE_DEPEND)) | 8963 OMP_CLAUSE_DEPEND)) |
7920 /* #pragma omp ordered depend is also just a stand-alone | 8964 /* #pragma omp ordered depend is also just a stand-alone |
7921 directive. */ | 8965 directive. */ |
7922 region = NULL; | 8966 region = NULL; |
8967 else if (code == GIMPLE_OMP_TASK | |
8968 && gimple_omp_task_taskwait_p (stmt)) | |
8969 /* #pragma omp taskwait depend(...) is a stand-alone directive. */ | |
8970 region = NULL; | |
7923 /* ..., this directive becomes the parent for a new region. */ | 8971 /* ..., this directive becomes the parent for a new region. */ |
7924 if (region) | 8972 if (region) |
7925 parent = region; | 8973 parent = region; |
7926 } | 8974 } |
7927 } | 8975 } |
8108 bool fallthru = false; | 9156 bool fallthru = false; |
8109 | 9157 |
8110 switch (code) | 9158 switch (code) |
8111 { | 9159 { |
8112 case GIMPLE_OMP_PARALLEL: | 9160 case GIMPLE_OMP_PARALLEL: |
8113 case GIMPLE_OMP_TASK: | |
8114 case GIMPLE_OMP_FOR: | 9161 case GIMPLE_OMP_FOR: |
8115 case GIMPLE_OMP_SINGLE: | 9162 case GIMPLE_OMP_SINGLE: |
8116 case GIMPLE_OMP_TEAMS: | 9163 case GIMPLE_OMP_TEAMS: |
8117 case GIMPLE_OMP_MASTER: | 9164 case GIMPLE_OMP_MASTER: |
8118 case GIMPLE_OMP_TASKGROUP: | 9165 case GIMPLE_OMP_TASKGROUP: |
8121 case GIMPLE_OMP_GRID_BODY: | 9168 case GIMPLE_OMP_GRID_BODY: |
8122 cur_region = new_omp_region (bb, code, cur_region); | 9169 cur_region = new_omp_region (bb, code, cur_region); |
8123 fallthru = true; | 9170 fallthru = true; |
8124 break; | 9171 break; |
8125 | 9172 |
9173 case GIMPLE_OMP_TASK: | |
9174 cur_region = new_omp_region (bb, code, cur_region); | |
9175 fallthru = true; | |
9176 if (gimple_omp_task_taskwait_p (last)) | |
9177 cur_region = cur_region->outer; | |
9178 break; | |
9179 | |
8126 case GIMPLE_OMP_ORDERED: | 9180 case GIMPLE_OMP_ORDERED: |
8127 cur_region = new_omp_region (bb, code, cur_region); | 9181 cur_region = new_omp_region (bb, code, cur_region); |
8128 fallthru = true; | 9182 fallthru = true; |
8129 if (omp_find_clause (gimple_omp_ordered_clauses | 9183 if (omp_find_clause (gimple_omp_ordered_clauses |
8130 (as_a <gomp_ordered *> (last)), | 9184 (as_a <gomp_ordered *> (last)), |
8139 { | 9193 { |
8140 case GF_OMP_TARGET_KIND_REGION: | 9194 case GF_OMP_TARGET_KIND_REGION: |
8141 case GF_OMP_TARGET_KIND_DATA: | 9195 case GF_OMP_TARGET_KIND_DATA: |
8142 case GF_OMP_TARGET_KIND_OACC_PARALLEL: | 9196 case GF_OMP_TARGET_KIND_OACC_PARALLEL: |
8143 case GF_OMP_TARGET_KIND_OACC_KERNELS: | 9197 case GF_OMP_TARGET_KIND_OACC_KERNELS: |
9198 case GF_OMP_TARGET_KIND_OACC_SERIAL: | |
8144 case GF_OMP_TARGET_KIND_OACC_DATA: | 9199 case GF_OMP_TARGET_KIND_OACC_DATA: |
8145 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: | 9200 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: |
8146 break; | 9201 break; |
8147 case GF_OMP_TARGET_KIND_UPDATE: | 9202 case GF_OMP_TARGET_KIND_UPDATE: |
8148 case GF_OMP_TARGET_KIND_ENTER_DATA: | 9203 case GF_OMP_TARGET_KIND_ENTER_DATA: |