comparison gcc/tree-ssa-math-opts.c @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
15:561a7518be6b 16:04ced10e8804
1 /* Global, SSA-based optimizations using mathematical identities. 1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 3
5 This file is part of GCC. 4 This file is part of GCC.
6 5
7 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
41 already has one. However, this cannot be done as an extension of 40 already has one. However, this cannot be done as an extension of
42 PRE for several reasons. 41 PRE for several reasons.
43 42
44 First of all, with some experiments it was found out that the 43 First of all, with some experiments it was found out that the
45 transformation is not always useful if there are only two divisions 44 transformation is not always useful if there are only two divisions
46 hy the same divisor. This is probably because modern processors 45 by the same divisor. This is probably because modern processors
47 can pipeline the divisions; on older, in-order processors it should 46 can pipeline the divisions; on older, in-order processors it should
48 still be effective to optimize two divisions by the same number. 47 still be effective to optimize two divisions by the same number.
49 We make this a param, and it shall be called N in the remainder of 48 We make this a param, and it shall be called N in the remainder of
50 this comment. 49 this comment.
51 50
86 variables in a single pass. */ 85 variables in a single pass. */
87 86
88 #include "config.h" 87 #include "config.h"
89 #include "system.h" 88 #include "system.h"
90 #include "coretypes.h" 89 #include "coretypes.h"
91 #include "tm.h" 90 #include "backend.h"
92 #include "flags.h" 91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h" 93 #include "tree.h"
94 #include "tree-flow.h" 94 #include "gimple.h"
95 #include "timevar.h" 95 #include "predict.h"
96 #include "alloc-pool.h"
96 #include "tree-pass.h" 97 #include "tree-pass.h"
97 #include "alloc-pool.h" 98 #include "ssa.h"
98 #include "basic-block.h" 99 #include "optabs-tree.h"
99 #include "target.h"
100 #include "gimple-pretty-print.h" 100 #include "gimple-pretty-print.h"
101 101 #include "alias.h"
102 /* FIXME: RTL headers have to be included here for optabs. */ 102 #include "fold-const.h"
103 #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ 103 #include "gimple-fold.h"
104 #include "expr.h" /* Because optabs.h wants sepops. */ 104 #include "gimple-iterator.h"
105 #include "optabs.h" 105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "params.h"
113 #include "internal-fn.h"
114 #include "case-cfn-macros.h"
115 #include "optabs-libfuncs.h"
116 #include "tree-eh.h"
117 #include "targhooks.h"
106 118
107 /* This structure represents one basic block that either computes a 119 /* This structure represents one basic block that either computes a
108 division, or is a common dominator for basic block that compute a 120 division, or is a common dominator for basic block that compute a
109 division. */ 121 division. */
110 struct occurrence { 122 struct occurrence {
115 inserted in BB. */ 127 inserted in BB. */
116 tree recip_def; 128 tree recip_def;
117 129
118 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that 130 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
119 was inserted in BB. */ 131 was inserted in BB. */
120 gimple recip_def_stmt; 132 gimple *recip_def_stmt;
121 133
122 /* Pointer to a list of "struct occurrence"s for blocks dominated 134 /* Pointer to a list of "struct occurrence"s for blocks dominated
123 by BB. */ 135 by BB. */
124 struct occurrence *children; 136 struct occurrence *children;
125 137
136 dominator for basic blocks that do. If it is false and trapping 148 dominator for basic blocks that do. If it is false and trapping
137 math is active, BB is not a candidate for inserting a reciprocal. */ 149 math is active, BB is not a candidate for inserting a reciprocal. */
138 bool bb_has_division; 150 bool bb_has_division;
139 }; 151 };
140 152
153 static struct
154 {
155 /* Number of 1.0/X ops inserted. */
156 int rdivs_inserted;
157
158 /* Number of 1.0/FUNC ops inserted. */
159 int rfuncs_inserted;
160 } reciprocal_stats;
161
162 static struct
163 {
164 /* Number of cexpi calls inserted. */
165 int inserted;
166 } sincos_stats;
167
168 static struct
169 {
170 /* Number of hand-written 16-bit nop / bswaps found. */
171 int found_16bit;
172
173 /* Number of hand-written 32-bit nop / bswaps found. */
174 int found_32bit;
175
176 /* Number of hand-written 64-bit nop / bswaps found. */
177 int found_64bit;
178 } nop_stats, bswap_stats;
179
180 static struct
181 {
182 /* Number of widening multiplication ops inserted. */
183 int widen_mults_inserted;
184
185 /* Number of integer multiply-and-accumulate ops inserted. */
186 int maccs_inserted;
187
188 /* Number of fp fused multiply-add ops inserted. */
189 int fmas_inserted;
190
191 /* Number of divmod calls inserted. */
192 int divmod_calls_inserted;
193 } widen_mul_stats;
141 194
142 /* The instance of "struct occurrence" representing the highest 195 /* The instance of "struct occurrence" representing the highest
143 interesting block in the dominator tree. */ 196 interesting block in the dominator tree. */
144 static struct occurrence *occ_head; 197 static struct occurrence *occ_head;
145 198
146 /* Allocation pool for getting instances of "struct occurrence". */ 199 /* Allocation pool for getting instances of "struct occurrence". */
147 static alloc_pool occ_pool; 200 static object_allocator<occurrence> *occ_pool;
148 201
149 202
150 203
151 /* Allocate and return a new struct occurrence for basic block BB, and 204 /* Allocate and return a new struct occurrence for basic block BB, and
152 whose children list is headed by CHILDREN. */ 205 whose children list is headed by CHILDREN. */
153 static struct occurrence * 206 static struct occurrence *
154 occ_new (basic_block bb, struct occurrence *children) 207 occ_new (basic_block bb, struct occurrence *children)
155 { 208 {
156 struct occurrence *occ; 209 struct occurrence *occ;
157 210
158 bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); 211 bb->aux = occ = occ_pool->allocate ();
159 memset (occ, 0, sizeof (struct occurrence)); 212 memset (occ, 0, sizeof (struct occurrence));
160 213
161 occ->bb = bb; 214 occ->bb = bb;
162 occ->children = children; 215 occ->children = children;
163 return occ; 216 return occ;
238 291
239 occ = (struct occurrence *) bb->aux; 292 occ = (struct occurrence *) bb->aux;
240 if (!occ) 293 if (!occ)
241 { 294 {
242 occ = occ_new (bb, NULL); 295 occ = occ_new (bb, NULL);
243 insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head); 296 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
244 } 297 }
245 298
246 occ->bb_has_division = true; 299 occ->bb_has_division = true;
247 occ->num_divisions++; 300 occ->num_divisions++;
248 } 301 }
274 } 327 }
275 328
276 329
277 /* Return whether USE_STMT is a floating-point division by DEF. */ 330 /* Return whether USE_STMT is a floating-point division by DEF. */
278 static inline bool 331 static inline bool
279 is_division_by (gimple use_stmt, tree def) 332 is_division_by (gimple *use_stmt, tree def)
280 { 333 {
281 return is_gimple_assign (use_stmt) 334 return is_gimple_assign (use_stmt)
282 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR 335 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
283 && gimple_assign_rhs2 (use_stmt) == def 336 && gimple_assign_rhs2 (use_stmt) == def
284 /* Do not recognize x / x as valid division, as we are getting 337 /* Do not recognize x / x as valid division, as we are getting
299 static void 352 static void
300 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, 353 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
301 tree def, tree recip_def, int threshold) 354 tree def, tree recip_def, int threshold)
302 { 355 {
303 tree type; 356 tree type;
304 gimple new_stmt; 357 gassign *new_stmt;
305 gimple_stmt_iterator gsi; 358 gimple_stmt_iterator gsi;
306 struct occurrence *occ_child; 359 struct occurrence *occ_child;
307 360
308 if (!recip_def 361 if (!recip_def
309 && (occ->bb_has_division || !flag_trapping_math) 362 && (occ->bb_has_division || !flag_trapping_math)
310 && occ->num_divisions >= threshold) 363 && occ->num_divisions >= threshold)
311 { 364 {
312 /* Make a variable with the replacement and substitute it. */ 365 /* Make a variable with the replacement and substitute it. */
313 type = TREE_TYPE (def); 366 type = TREE_TYPE (def);
314 recip_def = make_rename_temp (type, "reciptmp"); 367 recip_def = create_tmp_reg (type, "reciptmp");
315 new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def, 368 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
316 build_one_cst (type), def); 369 build_one_cst (type), def);
317 370
318 if (occ->bb_has_division) 371 if (occ->bb_has_division)
319 { 372 {
320 /* Case 1: insert before an existing division. */ 373 /* Case 1: insert before an existing division. */
321 gsi = gsi_after_labels (occ->bb); 374 gsi = gsi_after_labels (occ->bb);
337 /* Case 3: insert in a basic block not containing defs/uses. */ 390 /* Case 3: insert in a basic block not containing defs/uses. */
338 gsi = gsi_after_labels (occ->bb); 391 gsi = gsi_after_labels (occ->bb);
339 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 392 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
340 } 393 }
341 394
395 reciprocal_stats.rdivs_inserted++;
396
342 occ->recip_def_stmt = new_stmt; 397 occ->recip_def_stmt = new_stmt;
343 } 398 }
344 399
345 occ->recip_def = recip_def; 400 occ->recip_def = recip_def;
346 for (occ_child = occ->children; occ_child; occ_child = occ_child->next) 401 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
352 possible. */ 407 possible. */
353 408
354 static inline void 409 static inline void
355 replace_reciprocal (use_operand_p use_p) 410 replace_reciprocal (use_operand_p use_p)
356 { 411 {
357 gimple use_stmt = USE_STMT (use_p); 412 gimple *use_stmt = USE_STMT (use_p);
358 basic_block bb = gimple_bb (use_stmt); 413 basic_block bb = gimple_bb (use_stmt);
359 struct occurrence *occ = (struct occurrence *) bb->aux; 414 struct occurrence *occ = (struct occurrence *) bb->aux;
360 415
361 if (optimize_bb_for_speed_p (bb) 416 if (optimize_bb_for_speed_p (bb)
362 && occ->recip_def && use_stmt != occ->recip_def_stmt) 417 && occ->recip_def && use_stmt != occ->recip_def_stmt)
363 { 418 {
419 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
364 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); 420 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
365 SET_USE (use_p, occ->recip_def); 421 SET_USE (use_p, occ->recip_def);
366 fold_stmt_inplace (use_stmt); 422 fold_stmt_inplace (&gsi);
367 update_stmt (use_stmt); 423 update_stmt (use_stmt);
368 } 424 }
369 } 425 }
370 426
371 427
378 434
379 /* First get the two pointers hanging off OCC. */ 435 /* First get the two pointers hanging off OCC. */
380 next = occ->next; 436 next = occ->next;
381 child = occ->children; 437 child = occ->children;
382 occ->bb->aux = NULL; 438 occ->bb->aux = NULL;
383 pool_free (occ_pool, occ); 439 occ_pool->remove (occ);
384 440
385 /* Now ensure that we don't recurse unless it is necessary. */ 441 /* Now ensure that we don't recurse unless it is necessary. */
386 if (!child) 442 if (!child)
387 return next; 443 return next;
388 else 444 else
411 467
412 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); 468 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
413 469
414 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) 470 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
415 { 471 {
416 gimple use_stmt = USE_STMT (use_p); 472 gimple *use_stmt = USE_STMT (use_p);
417 if (is_division_by (use_stmt, def)) 473 if (is_division_by (use_stmt, def))
418 { 474 {
419 register_division_in (gimple_bb (use_stmt)); 475 register_division_in (gimple_bb (use_stmt));
420 count++; 476 count++;
421 } 477 }
423 479
424 /* Do the expensive part only if we can hope to optimize something. */ 480 /* Do the expensive part only if we can hope to optimize something. */
425 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); 481 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
426 if (count >= threshold) 482 if (count >= threshold)
427 { 483 {
428 gimple use_stmt; 484 gimple *use_stmt;
429 for (occ = occ_head; occ; occ = occ->next) 485 for (occ = occ_head; occ; occ = occ->next)
430 { 486 {
431 compute_merit (occ); 487 compute_merit (occ);
432 insert_reciprocals (def_gsi, occ, def, NULL, threshold); 488 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
433 } 489 }
446 occ = free_bb (occ); 502 occ = free_bb (occ);
447 503
448 occ_head = NULL; 504 occ_head = NULL;
449 } 505 }
450 506
451 static bool 507 /* Return an internal function that implements the reciprocal of CALL,
452 gate_cse_reciprocals (void) 508 or IFN_LAST if there is no such function that the target supports. */
453 { 509
454 return optimize && flag_reciprocal_math; 510 internal_fn
511 internal_fn_reciprocal (gcall *call)
512 {
513 internal_fn ifn;
514
515 switch (gimple_call_combined_fn (call))
516 {
517 CASE_CFN_SQRT:
518 ifn = IFN_RSQRT;
519 break;
520
521 default:
522 return IFN_LAST;
523 }
524
525 tree_pair types = direct_internal_fn_types (ifn, call);
526 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
527 return IFN_LAST;
528
529 return ifn;
455 } 530 }
456 531
457 /* Go through all the floating-point SSA_NAMEs, and call 532 /* Go through all the floating-point SSA_NAMEs, and call
458 execute_cse_reciprocals_1 on each of them. */ 533 execute_cse_reciprocals_1 on each of them. */
459 static unsigned int 534 namespace {
460 execute_cse_reciprocals (void) 535
536 const pass_data pass_data_cse_reciprocals =
537 {
538 GIMPLE_PASS, /* type */
539 "recip", /* name */
540 OPTGROUP_NONE, /* optinfo_flags */
541 TV_NONE, /* tv_id */
542 PROP_ssa, /* properties_required */
543 0, /* properties_provided */
544 0, /* properties_destroyed */
545 0, /* todo_flags_start */
546 TODO_update_ssa, /* todo_flags_finish */
547 };
548
549 class pass_cse_reciprocals : public gimple_opt_pass
550 {
551 public:
552 pass_cse_reciprocals (gcc::context *ctxt)
553 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
554 {}
555
556 /* opt_pass methods: */
557 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
558 virtual unsigned int execute (function *);
559
560 }; // class pass_cse_reciprocals
561
562 unsigned int
563 pass_cse_reciprocals::execute (function *fun)
461 { 564 {
462 basic_block bb; 565 basic_block bb;
463 tree arg; 566 tree arg;
464 567
465 occ_pool = create_alloc_pool ("dominators for recip", 568 occ_pool = new object_allocator<occurrence> ("dominators for recip");
466 sizeof (struct occurrence), 569
467 n_basic_blocks / 3 + 1); 570 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
468
469 calculate_dominance_info (CDI_DOMINATORS); 571 calculate_dominance_info (CDI_DOMINATORS);
470 calculate_dominance_info (CDI_POST_DOMINATORS); 572 calculate_dominance_info (CDI_POST_DOMINATORS);
471 573
472 #ifdef ENABLE_CHECKING 574 if (flag_checking)
473 FOR_EACH_BB (bb) 575 FOR_EACH_BB_FN (bb, fun)
474 gcc_assert (!bb->aux); 576 gcc_assert (!bb->aux);
475 #endif 577
476 578 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
477 for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg)) 579 if (FLOAT_TYPE_P (TREE_TYPE (arg))
478 if (gimple_default_def (cfun, arg)
479 && FLOAT_TYPE_P (TREE_TYPE (arg))
480 && is_gimple_reg (arg)) 580 && is_gimple_reg (arg))
481 execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg)); 581 {
482 582 tree name = ssa_default_def (fun, arg);
483 FOR_EACH_BB (bb) 583 if (name)
484 { 584 execute_cse_reciprocals_1 (NULL, name);
485 gimple_stmt_iterator gsi; 585 }
486 gimple phi; 586
587 FOR_EACH_BB_FN (bb, fun)
588 {
487 tree def; 589 tree def;
488 590
489 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 591 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
490 { 592 gsi_next (&gsi))
491 phi = gsi_stmt (gsi); 593 {
594 gphi *phi = gsi.phi ();
492 def = PHI_RESULT (phi); 595 def = PHI_RESULT (phi);
493 if (FLOAT_TYPE_P (TREE_TYPE (def)) 596 if (! virtual_operand_p (def)
494 && is_gimple_reg (def)) 597 && FLOAT_TYPE_P (TREE_TYPE (def)))
495 execute_cse_reciprocals_1 (NULL, def); 598 execute_cse_reciprocals_1 (NULL, def);
496 } 599 }
497 600
498 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 601 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
602 gsi_next (&gsi))
499 { 603 {
500 gimple stmt = gsi_stmt (gsi); 604 gimple *stmt = gsi_stmt (gsi);
501 605
502 if (gimple_has_lhs (stmt) 606 if (gimple_has_lhs (stmt)
503 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL 607 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
504 && FLOAT_TYPE_P (TREE_TYPE (def)) 608 && FLOAT_TYPE_P (TREE_TYPE (def))
505 && TREE_CODE (def) == SSA_NAME) 609 && TREE_CODE (def) == SSA_NAME)
508 612
509 if (optimize_bb_for_size_p (bb)) 613 if (optimize_bb_for_size_p (bb))
510 continue; 614 continue;
511 615
512 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ 616 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
513 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 617 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
618 gsi_next (&gsi))
514 { 619 {
515 gimple stmt = gsi_stmt (gsi); 620 gimple *stmt = gsi_stmt (gsi);
516 tree fndecl;
517 621
518 if (is_gimple_assign (stmt) 622 if (is_gimple_assign (stmt)
519 && gimple_assign_rhs_code (stmt) == RDIV_EXPR) 623 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
520 { 624 {
521 tree arg1 = gimple_assign_rhs2 (stmt); 625 tree arg1 = gimple_assign_rhs2 (stmt);
522 gimple stmt1; 626 gimple *stmt1;
523 627
524 if (TREE_CODE (arg1) != SSA_NAME) 628 if (TREE_CODE (arg1) != SSA_NAME)
525 continue; 629 continue;
526 630
527 stmt1 = SSA_NAME_DEF_STMT (arg1); 631 stmt1 = SSA_NAME_DEF_STMT (arg1);
528 632
529 if (is_gimple_call (stmt1) 633 if (is_gimple_call (stmt1)
530 && gimple_call_lhs (stmt1) 634 && gimple_call_lhs (stmt1))
531 && (fndecl = gimple_call_fndecl (stmt1))
532 && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
533 || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
534 { 635 {
535 enum built_in_function code; 636 bool fail;
536 bool md_code, fail;
537 imm_use_iterator ui; 637 imm_use_iterator ui;
538 use_operand_p use_p; 638 use_operand_p use_p;
539 639 tree fndecl = NULL_TREE;
540 code = DECL_FUNCTION_CODE (fndecl); 640
541 md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; 641 gcall *call = as_a <gcall *> (stmt1);
542 642 internal_fn ifn = internal_fn_reciprocal (call);
543 fndecl = targetm.builtin_reciprocal (code, md_code, false); 643 if (ifn == IFN_LAST)
544 if (!fndecl) 644 {
545 continue; 645 fndecl = gimple_call_fndecl (call);
646 if (!fndecl
647 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
648 continue;
649 fndecl = targetm.builtin_reciprocal (fndecl);
650 if (!fndecl)
651 continue;
652 }
546 653
547 /* Check that all uses of the SSA name are divisions, 654 /* Check that all uses of the SSA name are divisions,
548 otherwise replacing the defining statement will do 655 otherwise replacing the defining statement will do
549 the wrong thing. */ 656 the wrong thing. */
550 fail = false; 657 fail = false;
551 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) 658 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
552 { 659 {
553 gimple stmt2 = USE_STMT (use_p); 660 gimple *stmt2 = USE_STMT (use_p);
554 if (is_gimple_debug (stmt2)) 661 if (is_gimple_debug (stmt2))
555 continue; 662 continue;
556 if (!is_gimple_assign (stmt2) 663 if (!is_gimple_assign (stmt2)
557 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR 664 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
558 || gimple_assign_rhs1 (stmt2) == arg1 665 || gimple_assign_rhs1 (stmt2) == arg1
563 } 670 }
564 } 671 }
565 if (fail) 672 if (fail)
566 continue; 673 continue;
567 674
568 gimple_replace_lhs (stmt1, arg1); 675 gimple_replace_ssa_lhs (call, arg1);
569 gimple_call_set_fndecl (stmt1, fndecl); 676 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
570 update_stmt (stmt1); 677 {
678 auto_vec<tree, 4> args;
679 for (unsigned int i = 0;
680 i < gimple_call_num_args (call); i++)
681 args.safe_push (gimple_call_arg (call, i));
682 gcall *stmt2;
683 if (ifn == IFN_LAST)
684 stmt2 = gimple_build_call_vec (fndecl, args);
685 else
686 stmt2 = gimple_build_call_internal_vec (ifn, args);
687 gimple_call_set_lhs (stmt2, arg1);
688 if (gimple_vdef (call))
689 {
690 gimple_set_vdef (stmt2, gimple_vdef (call));
691 SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
692 }
693 gimple_call_set_nothrow (stmt2,
694 gimple_call_nothrow_p (call));
695 gimple_set_vuse (stmt2, gimple_vuse (call));
696 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
697 gsi_replace (&gsi2, stmt2, true);
698 }
699 else
700 {
701 if (ifn == IFN_LAST)
702 gimple_call_set_fndecl (call, fndecl);
703 else
704 gimple_call_set_internal_fn (call, ifn);
705 update_stmt (call);
706 }
707 reciprocal_stats.rfuncs_inserted++;
571 708
572 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) 709 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
573 { 710 {
711 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
574 gimple_assign_set_rhs_code (stmt, MULT_EXPR); 712 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
575 fold_stmt_inplace (stmt); 713 fold_stmt_inplace (&gsi);
576 update_stmt (stmt); 714 update_stmt (stmt);
577 } 715 }
578 } 716 }
579 } 717 }
580 } 718 }
581 } 719 }
582 720
721 statistics_counter_event (fun, "reciprocal divs inserted",
722 reciprocal_stats.rdivs_inserted);
723 statistics_counter_event (fun, "reciprocal functions inserted",
724 reciprocal_stats.rfuncs_inserted);
725
583 free_dominance_info (CDI_DOMINATORS); 726 free_dominance_info (CDI_DOMINATORS);
584 free_dominance_info (CDI_POST_DOMINATORS); 727 free_dominance_info (CDI_POST_DOMINATORS);
585 free_alloc_pool (occ_pool); 728 delete occ_pool;
586 return 0; 729 return 0;
587 } 730 }
588 731
589 struct gimple_opt_pass pass_cse_reciprocals = 732 } // anon namespace
590 { 733
591 { 734 gimple_opt_pass *
592 GIMPLE_PASS, 735 make_pass_cse_reciprocals (gcc::context *ctxt)
593 "recip", /* name */ 736 {
594 gate_cse_reciprocals, /* gate */ 737 return new pass_cse_reciprocals (ctxt);
595 execute_cse_reciprocals, /* execute */ 738 }
596 NULL, /* sub */
597 NULL, /* next */
598 0, /* static_pass_number */
599 TV_NONE, /* tv_id */
600 PROP_ssa, /* properties_required */
601 0, /* properties_provided */
602 0, /* properties_destroyed */
603 0, /* todo_flags_start */
604 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
605 | TODO_verify_stmts /* todo_flags_finish */
606 }
607 };
608 739
609 /* Records an occurrence at statement USE_STMT in the vector of trees 740 /* Records an occurrence at statement USE_STMT in the vector of trees
610 STMTS if it is dominated by *TOP_BB or dominates it or this basic block 741 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
611 is not yet initialized. Returns true if the occurrence was pushed on 742 is not yet initialized. Returns true if the occurrence was pushed on
612 the vector. Adjusts *TOP_BB to be the basic block dominating all 743 the vector. Adjusts *TOP_BB to be the basic block dominating all
613 statements in the vector. */ 744 statements in the vector. */
614 745
615 static bool 746 static bool
616 maybe_record_sincos (VEC(gimple, heap) **stmts, 747 maybe_record_sincos (vec<gimple *> *stmts,
617 basic_block *top_bb, gimple use_stmt) 748 basic_block *top_bb, gimple *use_stmt)
618 { 749 {
619 basic_block use_bb = gimple_bb (use_stmt); 750 basic_block use_bb = gimple_bb (use_stmt);
620 if (*top_bb 751 if (*top_bb
621 && (*top_bb == use_bb 752 && (*top_bb == use_bb
622 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) 753 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
623 VEC_safe_push (gimple, heap, *stmts, use_stmt); 754 stmts->safe_push (use_stmt);
624 else if (!*top_bb 755 else if (!*top_bb
625 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) 756 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
626 { 757 {
627 VEC_safe_push (gimple, heap, *stmts, use_stmt); 758 stmts->safe_push (use_stmt);
628 *top_bb = use_bb; 759 *top_bb = use_bb;
629 } 760 }
630 else 761 else
631 return false; 762 return false;
632 763
645 execute_cse_sincos_1 (tree name) 776 execute_cse_sincos_1 (tree name)
646 { 777 {
647 gimple_stmt_iterator gsi; 778 gimple_stmt_iterator gsi;
648 imm_use_iterator use_iter; 779 imm_use_iterator use_iter;
649 tree fndecl, res, type; 780 tree fndecl, res, type;
650 gimple def_stmt, use_stmt, stmt; 781 gimple *def_stmt, *use_stmt, *stmt;
651 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; 782 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
652 VEC(gimple, heap) *stmts = NULL; 783 auto_vec<gimple *> stmts;
653 basic_block top_bb = NULL; 784 basic_block top_bb = NULL;
654 int i; 785 int i;
655 bool cfg_changed = false; 786 bool cfg_changed = false;
656 787
657 type = TREE_TYPE (name); 788 type = TREE_TYPE (name);
658 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) 789 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
659 { 790 {
660 if (gimple_code (use_stmt) != GIMPLE_CALL 791 if (gimple_code (use_stmt) != GIMPLE_CALL
661 || !gimple_call_lhs (use_stmt) 792 || !gimple_call_lhs (use_stmt))
662 || !(fndecl = gimple_call_fndecl (use_stmt))
663 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
664 continue; 793 continue;
665 794
666 switch (DECL_FUNCTION_CODE (fndecl)) 795 switch (gimple_call_combined_fn (use_stmt))
667 { 796 {
668 CASE_FLT_FN (BUILT_IN_COS): 797 CASE_CFN_COS:
669 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 798 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
670 break; 799 break;
671 800
672 CASE_FLT_FN (BUILT_IN_SIN): 801 CASE_CFN_SIN:
673 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 802 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
674 break; 803 break;
675 804
676 CASE_FLT_FN (BUILT_IN_CEXPI): 805 CASE_CFN_CEXPI:
677 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 806 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
678 break; 807 break;
679 808
680 default:; 809 default:;
681 } 810 }
682 } 811 }
683 812
684 if (seen_cos + seen_sin + seen_cexpi <= 1) 813 if (seen_cos + seen_sin + seen_cexpi <= 1)
685 { 814 return false;
686 VEC_free(gimple, heap, stmts);
687 return false;
688 }
689 815
690 /* Simply insert cexpi at the beginning of top_bb but not earlier than 816 /* Simply insert cexpi at the beginning of top_bb but not earlier than
691 the name def statement. */ 817 the name def statement. */
692 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); 818 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
693 if (!fndecl) 819 if (!fndecl)
694 return false; 820 return false;
695 res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
696 stmt = gimple_build_call (fndecl, 1, name); 821 stmt = gimple_build_call (fndecl, 1, name);
697 res = make_ssa_name (res, stmt); 822 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
698 gimple_call_set_lhs (stmt, res); 823 gimple_call_set_lhs (stmt, res);
699 824
700 def_stmt = SSA_NAME_DEF_STMT (name); 825 def_stmt = SSA_NAME_DEF_STMT (name);
701 if (!SSA_NAME_IS_DEFAULT_DEF (name) 826 if (!SSA_NAME_IS_DEFAULT_DEF (name)
702 && gimple_code (def_stmt) != GIMPLE_PHI 827 && gimple_code (def_stmt) != GIMPLE_PHI
708 else 833 else
709 { 834 {
710 gsi = gsi_after_labels (top_bb); 835 gsi = gsi_after_labels (top_bb);
711 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 836 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
712 } 837 }
713 update_stmt (stmt); 838 sincos_stats.inserted++;
714 839
715 /* And adjust the recorded old call sites. */ 840 /* And adjust the recorded old call sites. */
716 for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i) 841 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
717 { 842 {
718 tree rhs = NULL; 843 tree rhs = NULL;
719 fndecl = gimple_call_fndecl (use_stmt); 844
720 845 switch (gimple_call_combined_fn (use_stmt))
721 switch (DECL_FUNCTION_CODE (fndecl)) 846 {
722 { 847 CASE_CFN_COS:
723 CASE_FLT_FN (BUILT_IN_COS):
724 rhs = fold_build1 (REALPART_EXPR, type, res); 848 rhs = fold_build1 (REALPART_EXPR, type, res);
725 break; 849 break;
726 850
727 CASE_FLT_FN (BUILT_IN_SIN): 851 CASE_CFN_SIN:
728 rhs = fold_build1 (IMAGPART_EXPR, type, res); 852 rhs = fold_build1 (IMAGPART_EXPR, type, res);
729 break; 853 break;
730 854
731 CASE_FLT_FN (BUILT_IN_CEXPI): 855 CASE_CFN_CEXPI:
732 rhs = res; 856 rhs = res;
733 break; 857 break;
734 858
735 default:; 859 default:;
736 gcc_unreachable (); 860 gcc_unreachable ();
743 gsi_replace (&gsi, stmt, true); 867 gsi_replace (&gsi, stmt, true);
744 if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) 868 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
745 cfg_changed = true; 869 cfg_changed = true;
746 } 870 }
747 871
748 VEC_free(gimple, heap, stmts);
749
750 return cfg_changed; 872 return cfg_changed;
751 } 873 }
752 874
875 /* To evaluate powi(x,n), the floating point value x raised to the
876 constant integer exponent n, we use a hybrid algorithm that
877 combines the "window method" with look-up tables. For an
878 introduction to exponentiation algorithms and "addition chains",
879 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
880 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
881 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
882 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
883
884 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
885 multiplications to inline before calling the system library's pow
886 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
887 so this default never requires calling pow, powf or powl. */
888
889 #ifndef POWI_MAX_MULTS
890 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
891 #endif
892
893 /* The size of the "optimal power tree" lookup table. All
894 exponents less than this value are simply looked up in the
895 powi_table below. This threshold is also used to size the
896 cache of pseudo registers that hold intermediate results. */
897 #define POWI_TABLE_SIZE 256
898
899 /* The size, in bits of the window, used in the "window method"
900 exponentiation algorithm. This is equivalent to a radix of
901 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
902 #define POWI_WINDOW_SIZE 3
903
904 /* The following table is an efficient representation of an
905 "optimal power tree". For each value, i, the corresponding
906 value, j, in the table states than an optimal evaluation
907 sequence for calculating pow(x,i) can be found by evaluating
908 pow(x,j)*pow(x,i-j). An optimal power tree for the first
909 100 integers is given in Knuth's "Seminumerical algorithms". */
910
911 static const unsigned char powi_table[POWI_TABLE_SIZE] =
912 {
913 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
914 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
915 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
916 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
917 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
918 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
919 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
920 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
921 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
922 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
923 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
924 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
925 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
926 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
927 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
928 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
929 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
930 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
931 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
932 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
933 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
934 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
935 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
936 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
937 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
938 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
939 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
940 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
941 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
942 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
943 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
944 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
945 };
946
947
948 /* Return the number of multiplications required to calculate
949 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
950 subroutine of powi_cost. CACHE is an array indicating
951 which exponents have already been calculated. */
952
953 static int
954 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
955 {
956 /* If we've already calculated this exponent, then this evaluation
957 doesn't require any additional multiplications. */
958 if (cache[n])
959 return 0;
960
961 cache[n] = true;
962 return powi_lookup_cost (n - powi_table[n], cache)
963 + powi_lookup_cost (powi_table[n], cache) + 1;
964 }
965
966 /* Return the number of multiplications required to calculate
967 powi(x,n) for an arbitrary x, given the exponent N. This
968 function needs to be kept in sync with powi_as_mults below. */
969
970 static int
971 powi_cost (HOST_WIDE_INT n)
972 {
973 bool cache[POWI_TABLE_SIZE];
974 unsigned HOST_WIDE_INT digit;
975 unsigned HOST_WIDE_INT val;
976 int result;
977
978 if (n == 0)
979 return 0;
980
981 /* Ignore the reciprocal when calculating the cost. */
982 val = (n < 0) ? -n : n;
983
984 /* Initialize the exponent cache. */
985 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
986 cache[1] = true;
987
988 result = 0;
989
990 while (val >= POWI_TABLE_SIZE)
991 {
992 if (val & 1)
993 {
994 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
995 result += powi_lookup_cost (digit, cache)
996 + POWI_WINDOW_SIZE + 1;
997 val >>= POWI_WINDOW_SIZE;
998 }
999 else
1000 {
1001 val >>= 1;
1002 result++;
1003 }
1004 }
1005
1006 return result + powi_lookup_cost (val, cache);
1007 }
1008
1009 /* Recursive subroutine of powi_as_mults. This function takes the
1010 array, CACHE, of already calculated exponents and an exponent N and
1011 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1012
1013 static tree
1014 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1015 HOST_WIDE_INT n, tree *cache)
1016 {
1017 tree op0, op1, ssa_target;
1018 unsigned HOST_WIDE_INT digit;
1019 gassign *mult_stmt;
1020
1021 if (n < POWI_TABLE_SIZE && cache[n])
1022 return cache[n];
1023
1024 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1025
1026 if (n < POWI_TABLE_SIZE)
1027 {
1028 cache[n] = ssa_target;
1029 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1030 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1031 }
1032 else if (n & 1)
1033 {
1034 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1035 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1036 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1037 }
1038 else
1039 {
1040 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1041 op1 = op0;
1042 }
1043
1044 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1045 gimple_set_location (mult_stmt, loc);
1046 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1047
1048 return ssa_target;
1049 }
1050
1051 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1052 This function needs to be kept in sync with powi_cost above. */
1053
1054 static tree
1055 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1056 tree arg0, HOST_WIDE_INT n)
1057 {
1058 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1059 gassign *div_stmt;
1060 tree target;
1061
1062 if (n == 0)
1063 return build_real (type, dconst1);
1064
1065 memset (cache, 0, sizeof (cache));
1066 cache[1] = arg0;
1067
1068 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1069 if (n >= 0)
1070 return result;
1071
1072 /* If the original exponent was negative, reciprocate the result. */
1073 target = make_temp_ssa_name (type, NULL, "powmult");
1074 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1075 build_real (type, dconst1), result);
1076 gimple_set_location (div_stmt, loc);
1077 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1078
1079 return target;
1080 }
1081
1082 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1083 location info LOC. If the arguments are appropriate, create an
1084 equivalent sequence of statements prior to GSI using an optimal
1085 number of multiplications, and return an expession holding the
1086 result. */
1087
1088 static tree
1089 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1090 tree arg0, HOST_WIDE_INT n)
1091 {
1092 /* Avoid largest negative number. */
1093 if (n != -n
1094 && ((n >= -1 && n <= 2)
1095 || (optimize_function_for_speed_p (cfun)
1096 && powi_cost (n) <= POWI_MAX_MULTS)))
1097 return powi_as_mults (gsi, loc, arg0, n);
1098
1099 return NULL_TREE;
1100 }
1101
1102 /* Build a gimple call statement that calls FN with argument ARG.
1103 Set the lhs of the call statement to a fresh SSA name. Insert the
1104 statement prior to GSI's current position, and return the fresh
1105 SSA name. */
1106
1107 static tree
1108 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1109 tree fn, tree arg)
1110 {
1111 gcall *call_stmt;
1112 tree ssa_target;
1113
1114 call_stmt = gimple_build_call (fn, 1, arg);
1115 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1116 gimple_set_lhs (call_stmt, ssa_target);
1117 gimple_set_location (call_stmt, loc);
1118 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1119
1120 return ssa_target;
1121 }
1122
1123 /* Build a gimple binary operation with the given CODE and arguments
1124 ARG0, ARG1, assigning the result to a new SSA name for variable
1125 TARGET. Insert the statement prior to GSI's current position, and
1126 return the fresh SSA name.*/
1127
1128 static tree
1129 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1130 const char *name, enum tree_code code,
1131 tree arg0, tree arg1)
1132 {
1133 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1134 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1135 gimple_set_location (stmt, loc);
1136 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1137 return result;
1138 }
1139
1140 /* Build a gimple reference operation with the given CODE and argument
1141 ARG, assigning the result to a new SSA name of TYPE with NAME.
1142 Insert the statement prior to GSI's current position, and return
1143 the fresh SSA name. */
1144
1145 static inline tree
1146 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1147 const char *name, enum tree_code code, tree arg0)
1148 {
1149 tree result = make_temp_ssa_name (type, NULL, name);
1150 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1151 gimple_set_location (stmt, loc);
1152 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1153 return result;
1154 }
1155
1156 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1157 prior to GSI's current position, and return the fresh SSA name. */
1158
1159 static tree
1160 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1161 tree type, tree val)
1162 {
1163 tree result = make_ssa_name (type);
1164 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1165 gimple_set_location (stmt, loc);
1166 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1167 return result;
1168 }
1169
1170 struct pow_synth_sqrt_info
1171 {
1172 bool *factors;
1173 unsigned int deepest;
1174 unsigned int num_mults;
1175 };
1176
1177 /* Return true iff the real value C can be represented as a
1178 sum of powers of 0.5 up to N. That is:
1179 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1180 Record in INFO the various parameters of the synthesis algorithm such
1181 as the factors a[i], the maximum 0.5 power and the number of
1182 multiplications that will be required. */
1183
1184 bool
1185 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1186 struct pow_synth_sqrt_info *info)
1187 {
1188 REAL_VALUE_TYPE factor = dconsthalf;
1189 REAL_VALUE_TYPE remainder = c;
1190
1191 info->deepest = 0;
1192 info->num_mults = 0;
1193 memset (info->factors, 0, n * sizeof (bool));
1194
1195 for (unsigned i = 0; i < n; i++)
1196 {
1197 REAL_VALUE_TYPE res;
1198
1199 /* If something inexact happened bail out now. */
1200 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1201 return false;
1202
1203 /* We have hit zero. The number is representable as a sum
1204 of powers of 0.5. */
1205 if (real_equal (&res, &dconst0))
1206 {
1207 info->factors[i] = true;
1208 info->deepest = i + 1;
1209 return true;
1210 }
1211 else if (!REAL_VALUE_NEGATIVE (res))
1212 {
1213 remainder = res;
1214 info->factors[i] = true;
1215 info->num_mults++;
1216 }
1217 else
1218 info->factors[i] = false;
1219
1220 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1221 }
1222 return false;
1223 }
1224
1225 /* Return the tree corresponding to FN being applied
1226 to ARG N times at GSI and LOC.
1227 Look up previous results from CACHE if need be.
1228 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1229
1230 static tree
1231 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1232 tree fn, location_t loc, tree *cache)
1233 {
1234 tree res = cache[n];
1235 if (!res)
1236 {
1237 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1238 res = build_and_insert_call (gsi, loc, fn, prev);
1239 cache[n] = res;
1240 }
1241
1242 return res;
1243 }
1244
1245 /* Print to STREAM the repeated application of function FNAME to ARG
1246 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1247 "foo (foo (x))". */
1248
1249 static void
1250 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1251 unsigned int n)
1252 {
1253 if (n == 0)
1254 fprintf (stream, "%s", arg);
1255 else
1256 {
1257 fprintf (stream, "%s (", fname);
1258 print_nested_fn (stream, fname, arg, n - 1);
1259 fprintf (stream, ")");
1260 }
1261 }
1262
1263 /* Print to STREAM the fractional sequence of sqrt chains
1264 applied to ARG, described by INFO. Used for the dump file. */
1265
1266 static void
1267 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1268 struct pow_synth_sqrt_info *info)
1269 {
1270 for (unsigned int i = 0; i < info->deepest; i++)
1271 {
1272 bool is_set = info->factors[i];
1273 if (is_set)
1274 {
1275 print_nested_fn (stream, "sqrt", arg, i + 1);
1276 if (i != info->deepest - 1)
1277 fprintf (stream, " * ");
1278 }
1279 }
1280 }
1281
1282 /* Print to STREAM a representation of raising ARG to an integer
1283 power N. Used for the dump file. */
1284
1285 static void
1286 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1287 {
1288 if (n > 1)
1289 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1290 else if (n == 1)
1291 fprintf (stream, "%s", arg);
1292 }
1293
1294 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1295 square roots. Place at GSI and LOC. Limit the maximum depth
1296 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1297 result of the expanded sequence or NULL_TREE if the expansion failed.
1298
1299 This routine assumes that ARG1 is a real number with a fractional part
1300 (the integer exponent case will have been handled earlier in
1301 gimple_expand_builtin_pow).
1302
1303 For ARG1 > 0.0:
1304 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1305 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1306 FRAC_PART == ARG1 - WHOLE_PART:
1307 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1308 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1309 if it can be expressed as such, that is if FRAC_PART satisfies:
1310 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1311 where integer a[i] is either 0 or 1.
1312
1313 Example:
1314 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1315 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1316
1317 For ARG1 < 0.0 there are two approaches:
1318 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1319 is calculated as above.
1320
1321 Example:
1322 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1323 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1324
1325 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1326 FRAC_PART := ARG1 - WHOLE_PART
1327 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1328 Example:
1329 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1330 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1331
1332 For ARG1 < 0.0 we choose between (A) and (B) depending on
1333 how many multiplications we'd have to do.
1334 So, for the example in (B): POW (x, -5.875), if we were to
1335 follow algorithm (A) we would produce:
1336 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1337 which contains more multiplications than approach (B).
1338
1339 Hopefully, this approach will eliminate potentially expensive POW library
1340 calls when unsafe floating point math is enabled and allow the compiler to
1341 further optimise the multiplies, square roots and divides produced by this
1342 function. */
1343
1344 static tree
1345 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1346 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1347 {
1348 tree type = TREE_TYPE (arg0);
1349 machine_mode mode = TYPE_MODE (type);
1350 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1351 bool one_over = true;
1352
1353 if (!sqrtfn)
1354 return NULL_TREE;
1355
1356 if (TREE_CODE (arg1) != REAL_CST)
1357 return NULL_TREE;
1358
1359 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1360
1361 gcc_assert (max_depth > 0);
1362 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1363
1364 struct pow_synth_sqrt_info synth_info;
1365 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1366 synth_info.deepest = 0;
1367 synth_info.num_mults = 0;
1368
1369 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1370 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1371
1372 /* The whole and fractional parts of exp. */
1373 REAL_VALUE_TYPE whole_part;
1374 REAL_VALUE_TYPE frac_part;
1375
1376 real_floor (&whole_part, mode, &exp);
1377 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1378
1379
1380 REAL_VALUE_TYPE ceil_whole = dconst0;
1381 REAL_VALUE_TYPE ceil_fract = dconst0;
1382
1383 if (neg_exp)
1384 {
1385 real_ceil (&ceil_whole, mode, &exp);
1386 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1387 }
1388
1389 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1390 return NULL_TREE;
1391
1392 /* Check whether it's more profitable to not use 1.0 / ... */
1393 if (neg_exp)
1394 {
1395 struct pow_synth_sqrt_info alt_synth_info;
1396 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1397 alt_synth_info.deepest = 0;
1398 alt_synth_info.num_mults = 0;
1399
1400 if (representable_as_half_series_p (ceil_fract, max_depth,
1401 &alt_synth_info)
1402 && alt_synth_info.deepest <= synth_info.deepest
1403 && alt_synth_info.num_mults < synth_info.num_mults)
1404 {
1405 whole_part = ceil_whole;
1406 frac_part = ceil_fract;
1407 synth_info.deepest = alt_synth_info.deepest;
1408 synth_info.num_mults = alt_synth_info.num_mults;
1409 memcpy (synth_info.factors, alt_synth_info.factors,
1410 (max_depth + 1) * sizeof (bool));
1411 one_over = false;
1412 }
1413 }
1414
1415 HOST_WIDE_INT n = real_to_integer (&whole_part);
1416 REAL_VALUE_TYPE cint;
1417 real_from_integer (&cint, VOIDmode, n, SIGNED);
1418
1419 if (!real_identical (&whole_part, &cint))
1420 return NULL_TREE;
1421
1422 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1423 return NULL_TREE;
1424
1425 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1426
1427 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1428
1429 /* Calculate the integer part of the exponent. */
1430 if (n > 1)
1431 {
1432 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1433 if (!integer_res)
1434 return NULL_TREE;
1435 }
1436
1437 if (dump_file)
1438 {
1439 char string[64];
1440
1441 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1442 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1443
1444 if (neg_exp)
1445 {
1446 if (one_over)
1447 {
1448 fprintf (dump_file, "1.0 / (");
1449 dump_integer_part (dump_file, "x", n);
1450 if (n > 0)
1451 fprintf (dump_file, " * ");
1452 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1453 fprintf (dump_file, ")");
1454 }
1455 else
1456 {
1457 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1458 fprintf (dump_file, " / (");
1459 dump_integer_part (dump_file, "x", n);
1460 fprintf (dump_file, ")");
1461 }
1462 }
1463 else
1464 {
1465 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1466 if (n > 0)
1467 fprintf (dump_file, " * ");
1468 dump_integer_part (dump_file, "x", n);
1469 }
1470
1471 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1472 }
1473
1474
1475 tree fract_res = NULL_TREE;
1476 cache[0] = arg0;
1477
1478 /* Calculate the fractional part of the exponent. */
1479 for (unsigned i = 0; i < synth_info.deepest; i++)
1480 {
1481 if (synth_info.factors[i])
1482 {
1483 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1484
1485 if (!fract_res)
1486 fract_res = sqrt_chain;
1487
1488 else
1489 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1490 fract_res, sqrt_chain);
1491 }
1492 }
1493
1494 tree res = NULL_TREE;
1495
1496 if (neg_exp)
1497 {
1498 if (one_over)
1499 {
1500 if (n > 0)
1501 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1502 fract_res, integer_res);
1503 else
1504 res = fract_res;
1505
1506 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1507 build_real (type, dconst1), res);
1508 }
1509 else
1510 {
1511 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1512 fract_res, integer_res);
1513 }
1514 }
1515 else
1516 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1517 fract_res, integer_res);
1518 return res;
1519 }
1520
1521 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1522 with location info LOC. If possible, create an equivalent and
1523 less expensive sequence of statements prior to GSI, and return an
1524 expession holding the result. */
1525
1526 static tree
1527 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1528 tree arg0, tree arg1)
1529 {
1530 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1531 REAL_VALUE_TYPE c2, dconst3;
1532 HOST_WIDE_INT n;
1533 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1534 machine_mode mode;
1535 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1536 bool hw_sqrt_exists, c_is_int, c2_is_int;
1537
1538 dconst1_4 = dconst1;
1539 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1540
1541 /* If the exponent isn't a constant, there's nothing of interest
1542 to be done. */
1543 if (TREE_CODE (arg1) != REAL_CST)
1544 return NULL_TREE;
1545
1546 /* Don't perform the operation if flag_signaling_nans is on
1547 and the operand is a signaling NaN. */
1548 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1549 && ((TREE_CODE (arg0) == REAL_CST
1550 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1551 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1552 return NULL_TREE;
1553
1554 /* If the exponent is equivalent to an integer, expand to an optimal
1555 multiplication sequence when profitable. */
1556 c = TREE_REAL_CST (arg1);
1557 n = real_to_integer (&c);
1558 real_from_integer (&cint, VOIDmode, n, SIGNED);
1559 c_is_int = real_identical (&c, &cint);
1560
1561 if (c_is_int
1562 && ((n >= -1 && n <= 2)
1563 || (flag_unsafe_math_optimizations
1564 && speed_p
1565 && powi_cost (n) <= POWI_MAX_MULTS)))
1566 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1567
1568 /* Attempt various optimizations using sqrt and cbrt. */
1569 type = TREE_TYPE (arg0);
1570 mode = TYPE_MODE (type);
1571 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1572
1573 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1574 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1575 sqrt(-0) = -0. */
1576 if (sqrtfn
1577 && real_equal (&c, &dconsthalf)
1578 && !HONOR_SIGNED_ZEROS (mode))
1579 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1580
1581 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1582
1583 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1584 optimizations since 1./3. is not exactly representable. If x
1585 is negative and finite, the correct value of pow(x,1./3.) is
1586 a NaN with the "invalid" exception raised, because the value
1587 of 1./3. actually has an even denominator. The correct value
1588 of cbrt(x) is a negative real value. */
1589 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1590 dconst1_3 = real_value_truncate (mode, dconst_third ());
1591
1592 if (flag_unsafe_math_optimizations
1593 && cbrtfn
1594 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1595 && real_equal (&c, &dconst1_3))
1596 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1597
1598 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1599 if we don't have a hardware sqrt insn. */
1600 dconst1_6 = dconst1_3;
1601 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1602
1603 if (flag_unsafe_math_optimizations
1604 && sqrtfn
1605 && cbrtfn
1606 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1607 && speed_p
1608 && hw_sqrt_exists
1609 && real_equal (&c, &dconst1_6))
1610 {
1611 /* sqrt(x) */
1612 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1613
1614 /* cbrt(sqrt(x)) */
1615 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1616 }
1617
1618
1619 /* Attempt to expand the POW as a product of square root chains.
1620 Expand the 0.25 case even when otpimising for size. */
1621 if (flag_unsafe_math_optimizations
1622 && sqrtfn
1623 && hw_sqrt_exists
1624 && (speed_p || real_equal (&c, &dconst1_4))
1625 && !HONOR_SIGNED_ZEROS (mode))
1626 {
1627 unsigned int max_depth = speed_p
1628 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1629 : 2;
1630
1631 tree expand_with_sqrts
1632 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1633
1634 if (expand_with_sqrts)
1635 return expand_with_sqrts;
1636 }
1637
1638 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1639 n = real_to_integer (&c2);
1640 real_from_integer (&cint, VOIDmode, n, SIGNED);
1641 c2_is_int = real_identical (&c2, &cint);
1642
1643 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1644
1645 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1646 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1647
1648 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1649 different from pow(x, 1./3.) due to rounding and behavior with
1650 negative x, we need to constrain this transformation to unsafe
1651 math and positive x or finite math. */
1652 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1653 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1654 real_round (&c2, mode, &c2);
1655 n = real_to_integer (&c2);
1656 real_from_integer (&cint, VOIDmode, n, SIGNED);
1657 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1658 real_convert (&c2, mode, &c2);
1659
1660 if (flag_unsafe_math_optimizations
1661 && cbrtfn
1662 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1663 && real_identical (&c2, &c)
1664 && !c2_is_int
1665 && optimize_function_for_speed_p (cfun)
1666 && powi_cost (n / 3) <= POWI_MAX_MULTS)
1667 {
1668 tree powi_x_ndiv3 = NULL_TREE;
1669
1670 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1671 possible or profitable, give up. Skip the degenerate case when
1672 abs(n) < 3, where the result is always 1. */
1673 if (absu_hwi (n) >= 3)
1674 {
1675 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1676 abs_hwi (n / 3));
1677 if (!powi_x_ndiv3)
1678 return NULL_TREE;
1679 }
1680
1681 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1682 as that creates an unnecessary variable. Instead, just produce
1683 either cbrt(x) or cbrt(x) * cbrt(x). */
1684 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1685
1686 if (absu_hwi (n) % 3 == 1)
1687 powi_cbrt_x = cbrt_x;
1688 else
1689 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1690 cbrt_x, cbrt_x);
1691
1692 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1693 if (absu_hwi (n) < 3)
1694 result = powi_cbrt_x;
1695 else
1696 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1697 powi_x_ndiv3, powi_cbrt_x);
1698
1699 /* If n is negative, reciprocate the result. */
1700 if (n < 0)
1701 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1702 build_real (type, dconst1), result);
1703
1704 return result;
1705 }
1706
1707 /* No optimizations succeeded. */
1708 return NULL_TREE;
1709 }
1710
1711 /* ARG is the argument to a cabs builtin call in GSI with location info
1712 LOC. Create a sequence of statements prior to GSI that calculates
1713 sqrt(R*R + I*I), where R and I are the real and imaginary components
1714 of ARG, respectively. Return an expression holding the result. */
1715
1716 static tree
1717 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1718 {
1719 tree real_part, imag_part, addend1, addend2, sum, result;
1720 tree type = TREE_TYPE (TREE_TYPE (arg));
1721 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1722 machine_mode mode = TYPE_MODE (type);
1723
1724 if (!flag_unsafe_math_optimizations
1725 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1726 || !sqrtfn
1727 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1728 return NULL_TREE;
1729
1730 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1731 REALPART_EXPR, arg);
1732 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1733 real_part, real_part);
1734 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1735 IMAGPART_EXPR, arg);
1736 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1737 imag_part, imag_part);
1738 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1739 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1740
1741 return result;
1742 }
1743
753 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 1744 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
754 on the SSA_NAME argument of each of them. */ 1745 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
755 1746 an optimal number of multiplies, when n is a constant. */
756 static unsigned int 1747
757 execute_cse_sincos (void) 1748 namespace {
1749
1750 const pass_data pass_data_cse_sincos =
1751 {
1752 GIMPLE_PASS, /* type */
1753 "sincos", /* name */
1754 OPTGROUP_NONE, /* optinfo_flags */
1755 TV_NONE, /* tv_id */
1756 PROP_ssa, /* properties_required */
1757 PROP_gimple_opt_math, /* properties_provided */
1758 0, /* properties_destroyed */
1759 0, /* todo_flags_start */
1760 TODO_update_ssa, /* todo_flags_finish */
1761 };
1762
1763 class pass_cse_sincos : public gimple_opt_pass
1764 {
1765 public:
1766 pass_cse_sincos (gcc::context *ctxt)
1767 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1768 {}
1769
1770 /* opt_pass methods: */
1771 virtual bool gate (function *)
1772 {
1773 /* We no longer require either sincos or cexp, since powi expansion
1774 piggybacks on this pass. */
1775 return optimize;
1776 }
1777
1778 virtual unsigned int execute (function *);
1779
1780 }; // class pass_cse_sincos
1781
1782 unsigned int
1783 pass_cse_sincos::execute (function *fun)
758 { 1784 {
759 basic_block bb; 1785 basic_block bb;
760 bool cfg_changed = false; 1786 bool cfg_changed = false;
761 1787
762 calculate_dominance_info (CDI_DOMINATORS); 1788 calculate_dominance_info (CDI_DOMINATORS);
763 1789 memset (&sincos_stats, 0, sizeof (sincos_stats));
764 FOR_EACH_BB (bb) 1790
1791 FOR_EACH_BB_FN (bb, fun)
765 { 1792 {
766 gimple_stmt_iterator gsi; 1793 gimple_stmt_iterator gsi;
1794 bool cleanup_eh = false;
767 1795
768 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1796 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
769 { 1797 {
770 gimple stmt = gsi_stmt (gsi); 1798 gimple *stmt = gsi_stmt (gsi);
771 tree fndecl; 1799
1800 /* Only the last stmt in a bb could throw, no need to call
1801 gimple_purge_dead_eh_edges if we change something in the middle
1802 of a basic block. */
1803 cleanup_eh = false;
772 1804
773 if (is_gimple_call (stmt) 1805 if (is_gimple_call (stmt)
774 && gimple_call_lhs (stmt) 1806 && gimple_call_lhs (stmt))
775 && (fndecl = gimple_call_fndecl (stmt))
776 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
777 { 1807 {
778 tree arg; 1808 tree arg, arg0, arg1, result;
779 1809 HOST_WIDE_INT n;
780 switch (DECL_FUNCTION_CODE (fndecl)) 1810 location_t loc;
1811
1812 switch (gimple_call_combined_fn (stmt))
781 { 1813 {
782 CASE_FLT_FN (BUILT_IN_COS): 1814 CASE_CFN_COS:
783 CASE_FLT_FN (BUILT_IN_SIN): 1815 CASE_CFN_SIN:
784 CASE_FLT_FN (BUILT_IN_CEXPI): 1816 CASE_CFN_CEXPI:
1817 /* Make sure we have either sincos or cexp. */
1818 if (!targetm.libc_has_function (function_c99_math_complex)
1819 && !targetm.libc_has_function (function_sincos))
1820 break;
1821
785 arg = gimple_call_arg (stmt, 0); 1822 arg = gimple_call_arg (stmt, 0);
786 if (TREE_CODE (arg) == SSA_NAME) 1823 if (TREE_CODE (arg) == SSA_NAME)
787 cfg_changed |= execute_cse_sincos_1 (arg); 1824 cfg_changed |= execute_cse_sincos_1 (arg);
788 break; 1825 break;
789 1826
1827 CASE_CFN_POW:
1828 arg0 = gimple_call_arg (stmt, 0);
1829 arg1 = gimple_call_arg (stmt, 1);
1830
1831 loc = gimple_location (stmt);
1832 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1833
1834 if (result)
1835 {
1836 tree lhs = gimple_get_lhs (stmt);
1837 gassign *new_stmt = gimple_build_assign (lhs, result);
1838 gimple_set_location (new_stmt, loc);
1839 unlink_stmt_vdef (stmt);
1840 gsi_replace (&gsi, new_stmt, true);
1841 cleanup_eh = true;
1842 if (gimple_vdef (stmt))
1843 release_ssa_name (gimple_vdef (stmt));
1844 }
1845 break;
1846
1847 CASE_CFN_POWI:
1848 arg0 = gimple_call_arg (stmt, 0);
1849 arg1 = gimple_call_arg (stmt, 1);
1850 loc = gimple_location (stmt);
1851
1852 if (real_minus_onep (arg0))
1853 {
1854 tree t0, t1, cond, one, minus_one;
1855 gassign *stmt;
1856
1857 t0 = TREE_TYPE (arg0);
1858 t1 = TREE_TYPE (arg1);
1859 one = build_real (t0, dconst1);
1860 minus_one = build_real (t0, dconstm1);
1861
1862 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1863 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1864 arg1, build_int_cst (t1, 1));
1865 gimple_set_location (stmt, loc);
1866 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1867
1868 result = make_temp_ssa_name (t0, NULL, "powi");
1869 stmt = gimple_build_assign (result, COND_EXPR, cond,
1870 minus_one, one);
1871 gimple_set_location (stmt, loc);
1872 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1873 }
1874 else
1875 {
1876 if (!tree_fits_shwi_p (arg1))
1877 break;
1878
1879 n = tree_to_shwi (arg1);
1880 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1881 }
1882
1883 if (result)
1884 {
1885 tree lhs = gimple_get_lhs (stmt);
1886 gassign *new_stmt = gimple_build_assign (lhs, result);
1887 gimple_set_location (new_stmt, loc);
1888 unlink_stmt_vdef (stmt);
1889 gsi_replace (&gsi, new_stmt, true);
1890 cleanup_eh = true;
1891 if (gimple_vdef (stmt))
1892 release_ssa_name (gimple_vdef (stmt));
1893 }
1894 break;
1895
1896 CASE_CFN_CABS:
1897 arg0 = gimple_call_arg (stmt, 0);
1898 loc = gimple_location (stmt);
1899 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1900
1901 if (result)
1902 {
1903 tree lhs = gimple_get_lhs (stmt);
1904 gassign *new_stmt = gimple_build_assign (lhs, result);
1905 gimple_set_location (new_stmt, loc);
1906 unlink_stmt_vdef (stmt);
1907 gsi_replace (&gsi, new_stmt, true);
1908 cleanup_eh = true;
1909 if (gimple_vdef (stmt))
1910 release_ssa_name (gimple_vdef (stmt));
1911 }
1912 break;
1913
790 default:; 1914 default:;
791 } 1915 }
792 } 1916 }
793 } 1917 }
794 } 1918 if (cleanup_eh)
795 1919 cfg_changed |= gimple_purge_dead_eh_edges (bb);
796 free_dominance_info (CDI_DOMINATORS); 1920 }
1921
1922 statistics_counter_event (fun, "sincos statements inserted",
1923 sincos_stats.inserted);
1924
797 return cfg_changed ? TODO_cleanup_cfg : 0; 1925 return cfg_changed ? TODO_cleanup_cfg : 0;
798 } 1926 }
799 1927
800 static bool 1928 } // anon namespace
801 gate_cse_sincos (void) 1929
802 { 1930 gimple_opt_pass *
803 /* Make sure we have either sincos or cexp. */ 1931 make_pass_cse_sincos (gcc::context *ctxt)
804 return (TARGET_HAS_SINCOS 1932 {
805 || TARGET_C99_FUNCTIONS) 1933 return new pass_cse_sincos (ctxt);
806 && optimize; 1934 }
807 } 1935
808 1936 /* A symbolic number structure is used to detect byte permutation and selection
809 struct gimple_opt_pass pass_cse_sincos = 1937 patterns of a source. To achieve that, its field N contains an artificial
810 { 1938 number consisting of BITS_PER_MARKER sized markers tracking where does each
811 { 1939 byte come from in the source:
812 GIMPLE_PASS, 1940
813 "sincos", /* name */ 1941 0 - target byte has the value 0
814 gate_cse_sincos, /* gate */ 1942 FF - target byte has an unknown value (eg. due to sign extension)
815 execute_cse_sincos, /* execute */ 1943 1..size - marker value is the byte index in the source (0 for lsb).
816 NULL, /* sub */ 1944
817 NULL, /* next */ 1945 To detect permutations on memory sources (arrays and structures), a symbolic
818 0, /* static_pass_number */ 1946 number is also associated:
819 TV_NONE, /* tv_id */ 1947 - a base address BASE_ADDR and an OFFSET giving the address of the source;
820 PROP_ssa, /* properties_required */ 1948 - a range which gives the difference between the highest and lowest accessed
821 0, /* properties_provided */ 1949 memory location to make such a symbolic number;
822 0, /* properties_destroyed */ 1950 - the address SRC of the source element of lowest address as a convenience
823 0, /* todo_flags_start */ 1951 to easily get BASE_ADDR + offset + lowest bytepos;
824 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa 1952 - number of expressions N_OPS bitwise ored together to represent
825 | TODO_verify_stmts /* todo_flags_finish */ 1953 approximate cost of the computation.
826 } 1954
1955 Note 1: the range is different from size as size reflects the size of the
1956 type of the current expression. For instance, for an array char a[],
1957 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
1958 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
1959 time a range of 1.
1960
1961 Note 2: for non-memory sources, range holds the same value as size.
1962
1963 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
1964
1965 struct symbolic_number {
1966 uint64_t n;
1967 tree type;
1968 tree base_addr;
1969 tree offset;
1970 HOST_WIDE_INT bytepos;
1971 tree src;
1972 tree alias_set;
1973 tree vuse;
1974 unsigned HOST_WIDE_INT range;
1975 int n_ops;
827 }; 1976 };
828 1977
829 /* A symbolic number is used to detect byte permutation and selection 1978 #define BITS_PER_MARKER 8
830 patterns. Therefore the field N contains an artificial number 1979 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
831 consisting of byte size markers: 1980 #define MARKER_BYTE_UNKNOWN MARKER_MASK
832 1981 #define HEAD_MARKER(n, size) \
833 0 - byte has the value 0 1982 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
834 1..size - byte contains the content of the byte 1983
835 number indexed with that value minus one */ 1984 /* The number which the find_bswap_or_nop_1 result should match in
836 1985 order to have a nop. The number is masked according to the size of
837 struct symbolic_number { 1986 the symbolic number before using it. */
838 unsigned HOST_WIDEST_INT n; 1987 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
839 int size; 1988 (uint64_t)0x08070605 << 32 | 0x04030201)
840 }; 1989
1990 /* The number which the find_bswap_or_nop_1 result should match in
1991 order to have a byte swap. The number is masked according to the
1992 size of the symbolic number before using it. */
1993 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1994 (uint64_t)0x01020304 << 32 | 0x05060708)
841 1995
842 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic 1996 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
843 number N. Return false if the requested operation is not permitted 1997 number N. Return false if the requested operation is not permitted
844 on a symbolic number. */ 1998 on a symbolic number. */
845 1999
846 static inline bool 2000 static inline bool
847 do_shift_rotate (enum tree_code code, 2001 do_shift_rotate (enum tree_code code,
848 struct symbolic_number *n, 2002 struct symbolic_number *n,
849 int count) 2003 int count)
850 { 2004 {
851 if (count % 8 != 0) 2005 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2006 unsigned head_marker;
2007
2008 if (count % BITS_PER_UNIT != 0)
852 return false; 2009 return false;
2010 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
853 2011
854 /* Zero out the extra bits of N in order to avoid them being shifted 2012 /* Zero out the extra bits of N in order to avoid them being shifted
855 into the significant bits. */ 2013 into the significant bits. */
856 if (n->size < (int)sizeof (HOST_WIDEST_INT)) 2014 if (size < 64 / BITS_PER_MARKER)
857 n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1; 2015 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
858 2016
859 switch (code) 2017 switch (code)
860 { 2018 {
861 case LSHIFT_EXPR: 2019 case LSHIFT_EXPR:
862 n->n <<= count; 2020 n->n <<= count;
863 break; 2021 break;
864 case RSHIFT_EXPR: 2022 case RSHIFT_EXPR:
2023 head_marker = HEAD_MARKER (n->n, size);
865 n->n >>= count; 2024 n->n >>= count;
2025 /* Arithmetic shift of signed type: result is dependent on the value. */
2026 if (!TYPE_UNSIGNED (n->type) && head_marker)
2027 for (i = 0; i < count / BITS_PER_MARKER; i++)
2028 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2029 << ((size - 1 - i) * BITS_PER_MARKER);
866 break; 2030 break;
867 case LROTATE_EXPR: 2031 case LROTATE_EXPR:
868 n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count)); 2032 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
869 break; 2033 break;
870 case RROTATE_EXPR: 2034 case RROTATE_EXPR:
871 n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count)); 2035 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
872 break; 2036 break;
873 default: 2037 default:
874 return false; 2038 return false;
875 } 2039 }
2040 /* Zero unused bits for size. */
2041 if (size < 64 / BITS_PER_MARKER)
2042 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
876 return true; 2043 return true;
877 } 2044 }
878 2045
879 /* Perform sanity checking for the symbolic number N and the gimple 2046 /* Perform sanity checking for the symbolic number N and the gimple
880 statement STMT. */ 2047 statement STMT. */
881 2048
882 static inline bool 2049 static inline bool
883 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt) 2050 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
884 { 2051 {
885 tree lhs_type; 2052 tree lhs_type;
886 2053
887 lhs_type = gimple_expr_type (stmt); 2054 lhs_type = gimple_expr_type (stmt);
888 2055
889 if (TREE_CODE (lhs_type) != INTEGER_TYPE) 2056 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
890 return false; 2057 return false;
891 2058
892 if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT) 2059 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
893 return false; 2060 return false;
894 2061
895 return true; 2062 return true;
896 } 2063 }
897 2064
898 /* find_bswap_1 invokes itself recursively with N and tries to perform 2065 /* Initialize the symbolic number N for the bswap pass from the base element
899 the operation given by the rhs of STMT on the result. If the 2066 SRC manipulated by the bitwise OR expression. */
900 operation could successfully be executed the function returns the 2067
901 tree expression of the source operand and NULL otherwise. */ 2068 static bool
902 2069 init_symbolic_number (struct symbolic_number *n, tree src)
903 static tree 2070 {
904 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit) 2071 int size;
2072
2073 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
2074 return false;
2075
2076 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2077 n->src = src;
2078
2079 /* Set up the symbolic number N by setting each byte to a value between 1 and
2080 the byte size of rhs1. The highest order byte is set to n->size and the
2081 lowest order byte to 1. */
2082 n->type = TREE_TYPE (src);
2083 size = TYPE_PRECISION (n->type);
2084 if (size % BITS_PER_UNIT != 0)
2085 return false;
2086 size /= BITS_PER_UNIT;
2087 if (size > 64 / BITS_PER_MARKER)
2088 return false;
2089 n->range = size;
2090 n->n = CMPNOP;
2091 n->n_ops = 1;
2092
2093 if (size < 64 / BITS_PER_MARKER)
2094 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2095
2096 return true;
2097 }
2098
2099 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2100 the answer. If so, REF is that memory source and the base of the memory area
2101 accessed and the offset of the access from that base are recorded in N. */
2102
2103 bool
2104 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
2105 {
2106 /* Leaf node is an array or component ref. Memorize its base and
2107 offset from base to compare to other such leaf node. */
2108 HOST_WIDE_INT bitsize, bitpos;
2109 machine_mode mode;
2110 int unsignedp, reversep, volatilep;
2111 tree offset, base_addr;
2112
2113 /* Not prepared to handle PDP endian. */
2114 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2115 return false;
2116
2117 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2118 return false;
2119
2120 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2121 &unsignedp, &reversep, &volatilep);
2122
2123 if (TREE_CODE (base_addr) == MEM_REF)
2124 {
2125 offset_int bit_offset = 0;
2126 tree off = TREE_OPERAND (base_addr, 1);
2127
2128 if (!integer_zerop (off))
2129 {
2130 offset_int boff, coff = mem_ref_offset (base_addr);
2131 boff = coff << LOG2_BITS_PER_UNIT;
2132 bit_offset += boff;
2133 }
2134
2135 base_addr = TREE_OPERAND (base_addr, 0);
2136
2137 /* Avoid returning a negative bitpos as this may wreak havoc later. */
2138 if (wi::neg_p (bit_offset))
2139 {
2140 offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2141 offset_int tem = wi::bit_and_not (bit_offset, mask);
2142 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2143 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
2144 bit_offset -= tem;
2145 tem >>= LOG2_BITS_PER_UNIT;
2146 if (offset)
2147 offset = size_binop (PLUS_EXPR, offset,
2148 wide_int_to_tree (sizetype, tem));
2149 else
2150 offset = wide_int_to_tree (sizetype, tem);
2151 }
2152
2153 bitpos += bit_offset.to_shwi ();
2154 }
2155
2156 if (bitpos % BITS_PER_UNIT)
2157 return false;
2158 if (bitsize % BITS_PER_UNIT)
2159 return false;
2160 if (reversep)
2161 return false;
2162
2163 if (!init_symbolic_number (n, ref))
2164 return false;
2165 n->base_addr = base_addr;
2166 n->offset = offset;
2167 n->bytepos = bitpos / BITS_PER_UNIT;
2168 n->alias_set = reference_alias_ptr_type (ref);
2169 n->vuse = gimple_vuse (stmt);
2170 return true;
2171 }
2172
2173 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2174 symbolic number N1 and N2 whose source statements are respectively
2175 SOURCE_STMT1 and SOURCE_STMT2. */
2176
2177 static gimple *
2178 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
2179 gimple *source_stmt2, struct symbolic_number *n2,
2180 struct symbolic_number *n)
2181 {
2182 int i, size;
2183 uint64_t mask;
2184 gimple *source_stmt;
2185 struct symbolic_number *n_start;
2186
2187 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
2188 if (TREE_CODE (rhs1) == BIT_FIELD_REF
2189 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2190 rhs1 = TREE_OPERAND (rhs1, 0);
2191 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
2192 if (TREE_CODE (rhs2) == BIT_FIELD_REF
2193 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
2194 rhs2 = TREE_OPERAND (rhs2, 0);
2195
2196 /* Sources are different, cancel bswap if they are not memory location with
2197 the same base (array, structure, ...). */
2198 if (rhs1 != rhs2)
2199 {
2200 uint64_t inc;
2201 HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2202 struct symbolic_number *toinc_n_ptr, *n_end;
2203 basic_block bb1, bb2;
2204
2205 if (!n1->base_addr || !n2->base_addr
2206 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2207 return NULL;
2208
2209 if (!n1->offset != !n2->offset
2210 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2211 return NULL;
2212
2213 if (n1->bytepos < n2->bytepos)
2214 {
2215 n_start = n1;
2216 start_sub = n2->bytepos - n1->bytepos;
2217 }
2218 else
2219 {
2220 n_start = n2;
2221 start_sub = n1->bytepos - n2->bytepos;
2222 }
2223
2224 bb1 = gimple_bb (source_stmt1);
2225 bb2 = gimple_bb (source_stmt2);
2226 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
2227 source_stmt = source_stmt1;
2228 else
2229 source_stmt = source_stmt2;
2230
2231 /* Find the highest address at which a load is performed and
2232 compute related info. */
2233 end1 = n1->bytepos + (n1->range - 1);
2234 end2 = n2->bytepos + (n2->range - 1);
2235 if (end1 < end2)
2236 {
2237 end = end2;
2238 end_sub = end2 - end1;
2239 }
2240 else
2241 {
2242 end = end1;
2243 end_sub = end1 - end2;
2244 }
2245 n_end = (end2 > end1) ? n2 : n1;
2246
2247 /* Find symbolic number whose lsb is the most significant. */
2248 if (BYTES_BIG_ENDIAN)
2249 toinc_n_ptr = (n_end == n1) ? n2 : n1;
2250 else
2251 toinc_n_ptr = (n_start == n1) ? n2 : n1;
2252
2253 n->range = end - n_start->bytepos + 1;
2254
2255 /* Check that the range of memory covered can be represented by
2256 a symbolic number. */
2257 if (n->range > 64 / BITS_PER_MARKER)
2258 return NULL;
2259
2260 /* Reinterpret byte marks in symbolic number holding the value of
2261 bigger weight according to target endianness. */
2262 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2263 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2264 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2265 {
2266 unsigned marker
2267 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2268 if (marker && marker != MARKER_BYTE_UNKNOWN)
2269 toinc_n_ptr->n += inc;
2270 }
2271 }
2272 else
2273 {
2274 n->range = n1->range;
2275 n_start = n1;
2276 source_stmt = source_stmt1;
2277 }
2278
2279 if (!n1->alias_set
2280 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2281 n->alias_set = n1->alias_set;
2282 else
2283 n->alias_set = ptr_type_node;
2284 n->vuse = n_start->vuse;
2285 n->base_addr = n_start->base_addr;
2286 n->offset = n_start->offset;
2287 n->src = n_start->src;
2288 n->bytepos = n_start->bytepos;
2289 n->type = n_start->type;
2290 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2291
2292 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2293 {
2294 uint64_t masked1, masked2;
2295
2296 masked1 = n1->n & mask;
2297 masked2 = n2->n & mask;
2298 if (masked1 && masked2 && masked1 != masked2)
2299 return NULL;
2300 }
2301 n->n = n1->n | n2->n;
2302 n->n_ops = n1->n_ops + n2->n_ops;
2303
2304 return source_stmt;
2305 }
2306
2307 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2308 the operation given by the rhs of STMT on the result. If the operation
2309 could successfully be executed the function returns a gimple stmt whose
2310 rhs's first tree is the expression of the source operand and NULL
2311 otherwise. */
2312
2313 static gimple *
2314 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
905 { 2315 {
906 enum tree_code code; 2316 enum tree_code code;
907 tree rhs1, rhs2 = NULL; 2317 tree rhs1, rhs2 = NULL;
908 gimple rhs1_stmt, rhs2_stmt; 2318 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
909 tree source_expr1;
910 enum gimple_rhs_class rhs_class; 2319 enum gimple_rhs_class rhs_class;
911 2320
912 if (!limit || !is_gimple_assign (stmt)) 2321 if (!limit || !is_gimple_assign (stmt))
913 return NULL_TREE; 2322 return NULL;
914 2323
915 rhs1 = gimple_assign_rhs1 (stmt); 2324 rhs1 = gimple_assign_rhs1 (stmt);
916 2325
2326 if (find_bswap_or_nop_load (stmt, rhs1, n))
2327 return stmt;
2328
2329 /* Handle BIT_FIELD_REF. */
2330 if (TREE_CODE (rhs1) == BIT_FIELD_REF
2331 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2332 {
2333 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
2334 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
2335 if (bitpos % BITS_PER_UNIT == 0
2336 && bitsize % BITS_PER_UNIT == 0
2337 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
2338 {
2339 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
2340 if (BYTES_BIG_ENDIAN)
2341 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
2342
2343 /* Shift. */
2344 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
2345 return NULL;
2346
2347 /* Mask. */
2348 uint64_t mask = 0;
2349 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2350 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
2351 i++, tmp <<= BITS_PER_UNIT)
2352 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2353 n->n &= mask;
2354
2355 /* Convert. */
2356 n->type = TREE_TYPE (rhs1);
2357 if (!n->base_addr)
2358 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2359
2360 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
2361 }
2362
2363 return NULL;
2364 }
2365
917 if (TREE_CODE (rhs1) != SSA_NAME) 2366 if (TREE_CODE (rhs1) != SSA_NAME)
918 return NULL_TREE; 2367 return NULL;
919 2368
920 code = gimple_assign_rhs_code (stmt); 2369 code = gimple_assign_rhs_code (stmt);
921 rhs_class = gimple_assign_rhs_class (stmt); 2370 rhs_class = gimple_assign_rhs_class (stmt);
922 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); 2371 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
923 2372
934 if (code != BIT_AND_EXPR 2383 if (code != BIT_AND_EXPR
935 && code != LSHIFT_EXPR 2384 && code != LSHIFT_EXPR
936 && code != RSHIFT_EXPR 2385 && code != RSHIFT_EXPR
937 && code != LROTATE_EXPR 2386 && code != LROTATE_EXPR
938 && code != RROTATE_EXPR 2387 && code != RROTATE_EXPR
939 && code != NOP_EXPR 2388 && !CONVERT_EXPR_CODE_P (code))
940 && code != CONVERT_EXPR) 2389 return NULL;
941 return NULL_TREE; 2390
942 2391 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
943 source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1); 2392
944 2393 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
945 /* If find_bswap_1 returned NULL STMT is a leaf node and we have 2394 we have to initialize the symbolic number. */
946 to initialize the symbolic number. */ 2395 if (!source_stmt1)
947 if (!source_expr1) 2396 {
948 { 2397 if (gimple_assign_load_p (stmt)
949 /* Set up the symbolic number N by setting each byte to a 2398 || !init_symbolic_number (n, rhs1))
950 value between 1 and the byte size of rhs1. The highest 2399 return NULL;
951 order byte is set to n->size and the lowest order 2400 source_stmt1 = stmt;
952 byte to 1. */
953 n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
954 if (n->size % BITS_PER_UNIT != 0)
955 return NULL_TREE;
956 n->size /= BITS_PER_UNIT;
957 n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
958 (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
959
960 if (n->size < (int)sizeof (HOST_WIDEST_INT))
961 n->n &= ((unsigned HOST_WIDEST_INT)1 <<
962 (n->size * BITS_PER_UNIT)) - 1;
963
964 source_expr1 = rhs1;
965 } 2401 }
966 2402
967 switch (code) 2403 switch (code)
968 { 2404 {
969 case BIT_AND_EXPR: 2405 case BIT_AND_EXPR:
970 { 2406 {
971 int i; 2407 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
972 unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2); 2408 uint64_t val = int_cst_value (rhs2), mask = 0;
973 unsigned HOST_WIDEST_INT tmp = val; 2409 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
974 2410
975 /* Only constants masking full bytes are allowed. */ 2411 /* Only constants masking full bytes are allowed. */
976 for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT) 2412 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
977 if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff) 2413 if ((val & tmp) != 0 && (val & tmp) != tmp)
978 return NULL_TREE; 2414 return NULL;
979 2415 else if (val & tmp)
980 n->n &= val; 2416 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2417
2418 n->n &= mask;
981 } 2419 }
982 break; 2420 break;
983 case LSHIFT_EXPR: 2421 case LSHIFT_EXPR:
984 case RSHIFT_EXPR: 2422 case RSHIFT_EXPR:
985 case LROTATE_EXPR: 2423 case LROTATE_EXPR:
986 case RROTATE_EXPR: 2424 case RROTATE_EXPR:
987 if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) 2425 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
988 return NULL_TREE; 2426 return NULL;
989 break; 2427 break;
990 CASE_CONVERT: 2428 CASE_CONVERT:
991 { 2429 {
992 int type_size; 2430 int i, type_size, old_type_size;
993 2431 tree type;
994 type_size = TYPE_PRECISION (gimple_expr_type (stmt)); 2432
2433 type = gimple_expr_type (stmt);
2434 type_size = TYPE_PRECISION (type);
995 if (type_size % BITS_PER_UNIT != 0) 2435 if (type_size % BITS_PER_UNIT != 0)
996 return NULL_TREE; 2436 return NULL;
997 2437 type_size /= BITS_PER_UNIT;
998 if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT))) 2438 if (type_size > 64 / BITS_PER_MARKER)
2439 return NULL;
2440
2441 /* Sign extension: result is dependent on the value. */
2442 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2443 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2444 && HEAD_MARKER (n->n, old_type_size))
2445 for (i = 0; i < type_size - old_type_size; i++)
2446 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2447 << ((type_size - 1 - i) * BITS_PER_MARKER);
2448
2449 if (type_size < 64 / BITS_PER_MARKER)
999 { 2450 {
1000 /* If STMT casts to a smaller type mask out the bits not 2451 /* If STMT casts to a smaller type mask out the bits not
1001 belonging to the target type. */ 2452 belonging to the target type. */
1002 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1; 2453 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
1003 } 2454 }
1004 n->size = type_size / BITS_PER_UNIT; 2455 n->type = type;
2456 if (!n->base_addr)
2457 n->range = type_size;
1005 } 2458 }
1006 break; 2459 break;
1007 default: 2460 default:
1008 return NULL_TREE; 2461 return NULL;
1009 }; 2462 };
1010 return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL; 2463 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
1011 } 2464 }
1012 2465
1013 /* Handle binary rhs. */ 2466 /* Handle binary rhs. */
1014 2467
1015 if (rhs_class == GIMPLE_BINARY_RHS) 2468 if (rhs_class == GIMPLE_BINARY_RHS)
1016 { 2469 {
1017 struct symbolic_number n1, n2; 2470 struct symbolic_number n1, n2;
1018 tree source_expr2; 2471 gimple *source_stmt, *source_stmt2;
1019 2472
1020 if (code != BIT_IOR_EXPR) 2473 if (code != BIT_IOR_EXPR)
1021 return NULL_TREE; 2474 return NULL;
1022 2475
1023 if (TREE_CODE (rhs2) != SSA_NAME) 2476 if (TREE_CODE (rhs2) != SSA_NAME)
1024 return NULL_TREE; 2477 return NULL;
1025 2478
1026 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); 2479 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1027 2480
1028 switch (code) 2481 switch (code)
1029 { 2482 {
1030 case BIT_IOR_EXPR: 2483 case BIT_IOR_EXPR:
1031 source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1); 2484 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
1032 2485
1033 if (!source_expr1) 2486 if (!source_stmt1)
1034 return NULL_TREE; 2487 return NULL;
1035 2488
1036 source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1); 2489 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
1037 2490
1038 if (source_expr1 != source_expr2 2491 if (!source_stmt2)
1039 || n1.size != n2.size) 2492 return NULL;
1040 return NULL_TREE; 2493
1041 2494 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
1042 n->size = n1.size; 2495 return NULL;
1043 n->n = n1.n | n2.n; 2496
2497 if (!n1.vuse != !n2.vuse
2498 || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2499 return NULL;
2500
2501 source_stmt
2502 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2503
2504 if (!source_stmt)
2505 return NULL;
1044 2506
1045 if (!verify_symbolic_number_p (n, stmt)) 2507 if (!verify_symbolic_number_p (n, stmt))
1046 return NULL_TREE; 2508 return NULL;
1047 2509
1048 break; 2510 break;
1049 default: 2511 default:
1050 return NULL_TREE; 2512 return NULL;
1051 } 2513 }
1052 return source_expr1; 2514 return source_stmt;
1053 } 2515 }
1054 return NULL_TREE; 2516 return NULL;
1055 } 2517 }
1056 2518
1057 /* Check if STMT completes a bswap implementation consisting of ORs, 2519 /* Check if STMT completes a bswap implementation or a read in a given
1058 SHIFTs and ANDs. Return the source tree expression on which the 2520 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
1059 byte swap is performed and NULL if no bswap was found. */ 2521 accordingly. It also sets N to represent the kind of operations
1060 2522 performed: size of the resulting expression and whether it works on
1061 static tree 2523 a memory source, and if so alias-set and vuse. At last, the
1062 find_bswap (gimple stmt) 2524 function returns a stmt whose rhs's first tree is the source
1063 { 2525 expression. */
1064 /* The number which the find_bswap result should match in order to 2526
1065 have a full byte swap. The number is shifted to the left according 2527 static gimple *
1066 to the size of the symbolic number before using it. */ 2528 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
1067 unsigned HOST_WIDEST_INT cmp = 2529 {
1068 sizeof (HOST_WIDEST_INT) < 8 ? 0 : 2530 unsigned rsize;
1069 (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708; 2531 uint64_t tmpn, mask;
1070 2532 /* The number which the find_bswap_or_nop_1 result should match in order
1071 struct symbolic_number n; 2533 to have a full byte swap. The number is shifted to the right
1072 tree source_expr; 2534 according to the size of the symbolic number before using it. */
2535 uint64_t cmpxchg = CMPXCHG;
2536 uint64_t cmpnop = CMPNOP;
2537
2538 gimple *ins_stmt;
2539 int limit;
1073 2540
1074 /* The last parameter determines the depth search limit. It usually 2541 /* The last parameter determines the depth search limit. It usually
1075 correlates directly to the number of bytes to be touched. We 2542 correlates directly to the number n of bytes to be touched. We
1076 increase that number by one here in order to also cover signed -> 2543 increase that number by log2(n) + 1 here in order to also
1077 unsigned conversions of the src operand as can be seen in 2544 cover signed -> unsigned conversions of the src operand as can be seen
1078 libgcc. */ 2545 in libgcc, and for initial shift/and operation of the src operand. */
1079 source_expr = find_bswap_1 (stmt, &n, 2546 limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
1080 TREE_INT_CST_LOW ( 2547 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
1081 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1); 2548 ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
1082 2549
1083 if (!source_expr) 2550 if (!ins_stmt)
1084 return NULL_TREE; 2551 return NULL;
1085 2552
1086 /* Zero out the extra bits of N and CMP. */ 2553 /* Find real size of result (highest non-zero byte). */
1087 if (n.size < (int)sizeof (HOST_WIDEST_INT)) 2554 if (n->base_addr)
1088 { 2555 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
1089 unsigned HOST_WIDEST_INT mask = 2556 else
1090 ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1; 2557 rsize = n->range;
1091 2558
1092 n.n &= mask; 2559 /* Zero out the bits corresponding to untouched bytes in original gimple
1093 cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT; 2560 expression. */
1094 } 2561 if (n->range < (int) sizeof (int64_t))
1095 2562 {
1096 /* A complete byte swap should make the symbolic number to start 2563 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
1097 with the largest digit in the highest order byte. */ 2564 cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
1098 if (cmp != n.n) 2565 cmpnop &= mask;
1099 return NULL_TREE; 2566 }
1100 2567
1101 return source_expr; 2568 /* Zero out the bits corresponding to unused bytes in the result of the
1102 } 2569 gimple expression. */
1103 2570 if (rsize < n->range)
1104 /* Find manual byte swap implementations and turn them into a bswap 2571 {
1105 builtin invokation. */ 2572 if (BYTES_BIG_ENDIAN)
1106 2573 {
1107 static unsigned int 2574 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
1108 execute_optimize_bswap (void) 2575 cmpxchg &= mask;
2576 cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
2577 }
2578 else
2579 {
2580 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2581 cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
2582 cmpnop &= mask;
2583 }
2584 n->range = rsize;
2585 }
2586
2587 /* A complete byte swap should make the symbolic number to start with
2588 the largest digit in the highest order byte. Unchanged symbolic
2589 number indicates a read with same endianness as target architecture. */
2590 if (n->n == cmpnop)
2591 *bswap = false;
2592 else if (n->n == cmpxchg)
2593 *bswap = true;
2594 else
2595 return NULL;
2596
2597 /* Useless bit manipulation performed by code. */
2598 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
2599 return NULL;
2600
2601 n->range *= BITS_PER_UNIT;
2602 return ins_stmt;
2603 }
2604
2605 namespace {
2606
2607 const pass_data pass_data_optimize_bswap =
2608 {
2609 GIMPLE_PASS, /* type */
2610 "bswap", /* name */
2611 OPTGROUP_NONE, /* optinfo_flags */
2612 TV_NONE, /* tv_id */
2613 PROP_ssa, /* properties_required */
2614 0, /* properties_provided */
2615 0, /* properties_destroyed */
2616 0, /* todo_flags_start */
2617 0, /* todo_flags_finish */
2618 };
2619
2620 class pass_optimize_bswap : public gimple_opt_pass
2621 {
2622 public:
2623 pass_optimize_bswap (gcc::context *ctxt)
2624 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2625 {}
2626
2627 /* opt_pass methods: */
2628 virtual bool gate (function *)
2629 {
2630 return flag_expensive_optimizations && optimize;
2631 }
2632
2633 virtual unsigned int execute (function *);
2634
2635 }; // class pass_optimize_bswap
2636
2637 /* Perform the bswap optimization: replace the expression computed in the rhs
2638 of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2639 Which of these alternatives replace the rhs is given by N->base_addr (non
2640 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
2641 load to perform are also given in N while the builtin bswap invoke is given
2642 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
2643 load statements involved to construct the rhs in CUR_STMT and N->range gives
2644 the size of the rhs expression for maintaining some statistics.
2645
2646 Note that if the replacement involve a load, CUR_STMT is moved just after
2647 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2648 changing of basic block. */
2649
2650 static bool
2651 bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
2652 tree bswap_type, tree load_type, struct symbolic_number *n,
2653 bool bswap)
2654 {
2655 gimple_stmt_iterator gsi;
2656 tree src, tmp, tgt;
2657 gimple *bswap_stmt;
2658
2659 gsi = gsi_for_stmt (cur_stmt);
2660 src = n->src;
2661 tgt = gimple_assign_lhs (cur_stmt);
2662
2663 /* Need to load the value from memory first. */
2664 if (n->base_addr)
2665 {
2666 gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
2667 tree addr_expr, addr_tmp, val_expr, val_tmp;
2668 tree load_offset_ptr, aligned_load_type;
2669 gimple *addr_stmt, *load_stmt;
2670 unsigned align;
2671 HOST_WIDE_INT load_offset = 0;
2672 basic_block ins_bb, cur_bb;
2673
2674 ins_bb = gimple_bb (ins_stmt);
2675 cur_bb = gimple_bb (cur_stmt);
2676 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
2677 return false;
2678
2679 align = get_object_alignment (src);
2680
2681 /* Move cur_stmt just before one of the load of the original
2682 to ensure it has the same VUSE. See PR61517 for what could
2683 go wrong. */
2684 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
2685 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
2686 gsi_move_before (&gsi, &gsi_ins);
2687 gsi = gsi_for_stmt (cur_stmt);
2688
2689 /* Compute address to load from and cast according to the size
2690 of the load. */
2691 addr_expr = build_fold_addr_expr (unshare_expr (src));
2692 if (is_gimple_mem_ref_addr (addr_expr))
2693 addr_tmp = addr_expr;
2694 else
2695 {
2696 addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2697 "load_src");
2698 addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2699 gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2700 }
2701
2702 /* Perform the load. */
2703 aligned_load_type = load_type;
2704 if (align < TYPE_ALIGN (load_type))
2705 aligned_load_type = build_aligned_type (load_type, align);
2706 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2707 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2708 load_offset_ptr);
2709
2710 if (!bswap)
2711 {
2712 if (n->range == 16)
2713 nop_stats.found_16bit++;
2714 else if (n->range == 32)
2715 nop_stats.found_32bit++;
2716 else
2717 {
2718 gcc_assert (n->range == 64);
2719 nop_stats.found_64bit++;
2720 }
2721
2722 /* Convert the result of load if necessary. */
2723 if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2724 {
2725 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2726 "load_dst");
2727 load_stmt = gimple_build_assign (val_tmp, val_expr);
2728 gimple_set_vuse (load_stmt, n->vuse);
2729 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2730 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2731 }
2732 else
2733 {
2734 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2735 gimple_set_vuse (cur_stmt, n->vuse);
2736 }
2737 update_stmt (cur_stmt);
2738
2739 if (dump_file)
2740 {
2741 fprintf (dump_file,
2742 "%d bit load in target endianness found at: ",
2743 (int) n->range);
2744 print_gimple_stmt (dump_file, cur_stmt, 0);
2745 }
2746 return true;
2747 }
2748 else
2749 {
2750 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2751 load_stmt = gimple_build_assign (val_tmp, val_expr);
2752 gimple_set_vuse (load_stmt, n->vuse);
2753 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2754 }
2755 src = val_tmp;
2756 }
2757 else if (!bswap)
2758 {
2759 gimple *g;
2760 if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
2761 {
2762 if (!is_gimple_val (src))
2763 return false;
2764 g = gimple_build_assign (tgt, NOP_EXPR, src);
2765 }
2766 else
2767 g = gimple_build_assign (tgt, src);
2768 if (n->range == 16)
2769 nop_stats.found_16bit++;
2770 else if (n->range == 32)
2771 nop_stats.found_32bit++;
2772 else
2773 {
2774 gcc_assert (n->range == 64);
2775 nop_stats.found_64bit++;
2776 }
2777 if (dump_file)
2778 {
2779 fprintf (dump_file,
2780 "%d bit reshuffle in target endianness found at: ",
2781 (int) n->range);
2782 print_gimple_stmt (dump_file, cur_stmt, 0);
2783 }
2784 gsi_replace (&gsi, g, true);
2785 return true;
2786 }
2787 else if (TREE_CODE (src) == BIT_FIELD_REF)
2788 src = TREE_OPERAND (src, 0);
2789
2790 if (n->range == 16)
2791 bswap_stats.found_16bit++;
2792 else if (n->range == 32)
2793 bswap_stats.found_32bit++;
2794 else
2795 {
2796 gcc_assert (n->range == 64);
2797 bswap_stats.found_64bit++;
2798 }
2799
2800 tmp = src;
2801
2802 /* Convert the src expression if necessary. */
2803 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2804 {
2805 gimple *convert_stmt;
2806
2807 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2808 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2809 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2810 }
2811
2812 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
2813 are considered as rotation of 2N bit values by N bits is generally not
2814 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
2815 gives 0x03040102 while a bswap for that value is 0x04030201. */
2816 if (bswap && n->range == 16)
2817 {
2818 tree count = build_int_cst (NULL, BITS_PER_UNIT);
2819 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2820 bswap_stmt = gimple_build_assign (NULL, src);
2821 }
2822 else
2823 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2824
2825 tmp = tgt;
2826
2827 /* Convert the result if necessary. */
2828 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2829 {
2830 gimple *convert_stmt;
2831
2832 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2833 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2834 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2835 }
2836
2837 gimple_set_lhs (bswap_stmt, tmp);
2838
2839 if (dump_file)
2840 {
2841 fprintf (dump_file, "%d bit bswap implementation found at: ",
2842 (int) n->range);
2843 print_gimple_stmt (dump_file, cur_stmt, 0);
2844 }
2845
2846 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2847 gsi_remove (&gsi, true);
2848 return true;
2849 }
2850
2851 /* Find manual byte swap implementations as well as load in a given
2852 endianness. Byte swaps are turned into a bswap builtin invokation
2853 while endian loads are converted to bswap builtin invokation or
2854 simple load according to the target endianness. */
2855
2856 unsigned int
2857 pass_optimize_bswap::execute (function *fun)
1109 { 2858 {
1110 basic_block bb; 2859 basic_block bb;
1111 bool bswap32_p, bswap64_p; 2860 bool bswap32_p, bswap64_p;
1112 bool changed = false; 2861 bool changed = false;
1113 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; 2862 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1114 2863
1115 if (BITS_PER_UNIT != 8) 2864 if (BITS_PER_UNIT != 8)
1116 return 0; 2865 return 0;
1117 2866
1118 if (sizeof (HOST_WIDEST_INT) < 8) 2867 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1119 return 0;
1120
1121 bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
1122 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); 2868 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1123 bswap64_p = (built_in_decls[BUILT_IN_BSWAP64] 2869 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1124 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing 2870 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1125 || (bswap32_p && word_mode == SImode))); 2871 || (bswap32_p && word_mode == SImode)));
1126
1127 if (!bswap32_p && !bswap64_p)
1128 return 0;
1129 2872
1130 /* Determine the argument type of the builtins. The code later on 2873 /* Determine the argument type of the builtins. The code later on
1131 assumes that the return and argument type are the same. */ 2874 assumes that the return and argument type are the same. */
1132 if (bswap32_p) 2875 if (bswap32_p)
1133 { 2876 {
1134 tree fndecl = built_in_decls[BUILT_IN_BSWAP32]; 2877 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1135 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); 2878 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1136 } 2879 }
1137 2880
1138 if (bswap64_p) 2881 if (bswap64_p)
1139 { 2882 {
1140 tree fndecl = built_in_decls[BUILT_IN_BSWAP64]; 2883 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1141 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); 2884 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1142 } 2885 }
1143 2886
1144 FOR_EACH_BB (bb) 2887 memset (&nop_stats, 0, sizeof (nop_stats));
2888 memset (&bswap_stats, 0, sizeof (bswap_stats));
2889 calculate_dominance_info (CDI_DOMINATORS);
2890
2891 FOR_EACH_BB_FN (bb, fun)
1145 { 2892 {
1146 gimple_stmt_iterator gsi; 2893 gimple_stmt_iterator gsi;
1147 2894
1148 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2895 /* We do a reverse scan for bswap patterns to make sure we get the
2896 widest match. As bswap pattern matching doesn't handle previously
2897 inserted smaller bswap replacements as sub-patterns, the wider
2898 variant wouldn't be detected. */
2899 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1149 { 2900 {
1150 gimple stmt = gsi_stmt (gsi); 2901 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1151 tree bswap_src, bswap_type; 2902 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1152 tree bswap_tmp; 2903 enum tree_code code;
1153 tree fndecl = NULL_TREE; 2904 struct symbolic_number n;
1154 int type_size; 2905 bool bswap;
1155 gimple call; 2906
1156 2907 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1157 if (!is_gimple_assign (stmt) 2908 might be moved to a different basic block by bswap_replace and gsi
1158 || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR) 2909 must not points to it if that's the case. Moving the gsi_prev
2910 there make sure that gsi points to the statement previous to
2911 cur_stmt while still making sure that all statements are
2912 considered in this basic block. */
2913 gsi_prev (&gsi);
2914
2915 if (!is_gimple_assign (cur_stmt))
1159 continue; 2916 continue;
1160 2917
1161 type_size = TYPE_PRECISION (gimple_expr_type (stmt)); 2918 code = gimple_assign_rhs_code (cur_stmt);
1162 2919 switch (code)
1163 switch (type_size)
1164 { 2920 {
2921 case LROTATE_EXPR:
2922 case RROTATE_EXPR:
2923 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2924 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2925 % BITS_PER_UNIT)
2926 continue;
2927 /* Fall through. */
2928 case BIT_IOR_EXPR:
2929 break;
2930 default:
2931 continue;
2932 }
2933
2934 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2935
2936 if (!ins_stmt)
2937 continue;
2938
2939 switch (n.range)
2940 {
2941 case 16:
2942 /* Already in canonical form, nothing to do. */
2943 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2944 continue;
2945 load_type = bswap_type = uint16_type_node;
2946 break;
1165 case 32: 2947 case 32:
2948 load_type = uint32_type_node;
1166 if (bswap32_p) 2949 if (bswap32_p)
1167 { 2950 {
1168 fndecl = built_in_decls[BUILT_IN_BSWAP32]; 2951 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1169 bswap_type = bswap32_type; 2952 bswap_type = bswap32_type;
1170 } 2953 }
1171 break; 2954 break;
1172 case 64: 2955 case 64:
2956 load_type = uint64_type_node;
1173 if (bswap64_p) 2957 if (bswap64_p)
1174 { 2958 {
1175 fndecl = built_in_decls[BUILT_IN_BSWAP64]; 2959 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1176 bswap_type = bswap64_type; 2960 bswap_type = bswap64_type;
1177 } 2961 }
1178 break; 2962 break;
1179 default: 2963 default:
1180 continue; 2964 continue;
1181 } 2965 }
1182 2966
1183 if (!fndecl) 2967 if (bswap && !fndecl && n.range != 16)
1184 continue; 2968 continue;
1185 2969
1186 bswap_src = find_bswap (stmt); 2970 if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
1187 2971 &n, bswap))
1188 if (!bswap_src) 2972 changed = true;
1189 continue; 2973 }
1190 2974 }
1191 changed = true; 2975
1192 2976 statistics_counter_event (fun, "16-bit nop implementations found",
1193 bswap_tmp = bswap_src; 2977 nop_stats.found_16bit);
1194 2978 statistics_counter_event (fun, "32-bit nop implementations found",
1195 /* Convert the src expression if necessary. */ 2979 nop_stats.found_32bit);
1196 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) 2980 statistics_counter_event (fun, "64-bit nop implementations found",
1197 { 2981 nop_stats.found_64bit);
1198 gimple convert_stmt; 2982 statistics_counter_event (fun, "16-bit bswap implementations found",
1199 2983 bswap_stats.found_16bit);
1200 bswap_tmp = create_tmp_var (bswap_type, "bswapsrc"); 2984 statistics_counter_event (fun, "32-bit bswap implementations found",
1201 add_referenced_var (bswap_tmp); 2985 bswap_stats.found_32bit);
1202 bswap_tmp = make_ssa_name (bswap_tmp, NULL); 2986 statistics_counter_event (fun, "64-bit bswap implementations found",
1203 2987 bswap_stats.found_64bit);
1204 convert_stmt = gimple_build_assign_with_ops ( 2988
1205 CONVERT_EXPR, bswap_tmp, bswap_src, NULL); 2989 return (changed ? TODO_update_ssa : 0);
1206 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); 2990 }
1207 } 2991
1208 2992 } // anon namespace
1209 call = gimple_build_call (fndecl, 1, bswap_tmp); 2993
1210 2994 gimple_opt_pass *
1211 bswap_tmp = gimple_assign_lhs (stmt); 2995 make_pass_optimize_bswap (gcc::context *ctxt)
1212 2996 {
1213 /* Convert the result if necessary. */ 2997 return new pass_optimize_bswap (ctxt);
1214 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type)) 2998 }
1215 { 2999
1216 gimple convert_stmt; 3000 /* Return true if stmt is a type conversion operation that can be stripped
1217 3001 when used in a widening multiply operation. */
1218 bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1219 add_referenced_var (bswap_tmp);
1220 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1221 convert_stmt = gimple_build_assign_with_ops (
1222 CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1223 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1224 }
1225
1226 gimple_call_set_lhs (call, bswap_tmp);
1227
1228 if (dump_file)
1229 {
1230 fprintf (dump_file, "%d bit bswap implementation found at: ",
1231 (int)type_size);
1232 print_gimple_stmt (dump_file, stmt, 0, 0);
1233 }
1234
1235 gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1236 gsi_remove (&gsi, true);
1237 }
1238 }
1239
1240 return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1241 | TODO_verify_stmts : 0);
1242 }
1243
1244 static bool 3002 static bool
1245 gate_optimize_bswap (void) 3003 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
1246 { 3004 {
1247 return flag_expensive_optimizations && optimize; 3005 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1248 } 3006
1249 3007 if (TREE_CODE (result_type) == INTEGER_TYPE)
1250 struct gimple_opt_pass pass_optimize_bswap = 3008 {
1251 { 3009 tree op_type;
1252 { 3010 tree inner_op_type;
1253 GIMPLE_PASS, 3011
1254 "bswap", /* name */ 3012 if (!CONVERT_EXPR_CODE_P (rhs_code))
1255 gate_optimize_bswap, /* gate */ 3013 return false;
1256 execute_optimize_bswap, /* execute */ 3014
1257 NULL, /* sub */ 3015 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
1258 NULL, /* next */ 3016
1259 0, /* static_pass_number */ 3017 /* If the type of OP has the same precision as the result, then
1260 TV_NONE, /* tv_id */ 3018 we can strip this conversion. The multiply operation will be
1261 PROP_ssa, /* properties_required */ 3019 selected to create the correct extension as a by-product. */
1262 0, /* properties_provided */ 3020 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
1263 0, /* properties_destroyed */ 3021 return true;
1264 0, /* todo_flags_start */ 3022
1265 0 /* todo_flags_finish */ 3023 /* We can also strip a conversion if it preserves the signed-ness of
1266 } 3024 the operation and doesn't narrow the range. */
1267 }; 3025 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1268 3026
1269 /* Return true if RHS is a suitable operand for a widening multiplication. 3027 /* If the inner-most type is unsigned, then we can strip any
3028 intermediate widening operation. If it's signed, then the
3029 intermediate widening operation must also be signed. */
3030 if ((TYPE_UNSIGNED (inner_op_type)
3031 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
3032 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
3033 return true;
3034
3035 return false;
3036 }
3037
3038 return rhs_code == FIXED_CONVERT_EXPR;
3039 }
3040
3041 /* Return true if RHS is a suitable operand for a widening multiplication,
3042 assuming a target type of TYPE.
1270 There are two cases: 3043 There are two cases:
1271 3044
1272 - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT 3045 - RHS makes some value at least twice as wide. Store that value
1273 if so, and store its type in *TYPE_OUT. 3046 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
1274 3047
1275 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, 3048 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1276 but leave *TYPE_OUT untouched. */ 3049 but leave *TYPE_OUT untouched. */
1277 3050
1278 static bool 3051 static bool
1279 is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out) 3052 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
1280 { 3053 tree *new_rhs_out)
1281 gimple stmt; 3054 {
1282 tree type, type1, rhs1; 3055 gimple *stmt;
1283 enum tree_code rhs_code; 3056 tree type1, rhs1;
1284 3057
1285 if (TREE_CODE (rhs) == SSA_NAME) 3058 if (TREE_CODE (rhs) == SSA_NAME)
1286 { 3059 {
1287 type = TREE_TYPE (rhs);
1288 stmt = SSA_NAME_DEF_STMT (rhs); 3060 stmt = SSA_NAME_DEF_STMT (rhs);
1289 if (!is_gimple_assign (stmt)) 3061 if (is_gimple_assign (stmt))
1290 return false; 3062 {
1291 3063 if (! widening_mult_conversion_strippable_p (type, stmt))
1292 rhs_code = gimple_assign_rhs_code (stmt); 3064 rhs1 = rhs;
1293 if (TREE_CODE (type) == INTEGER_TYPE 3065 else
1294 ? !CONVERT_EXPR_CODE_P (rhs_code) 3066 {
1295 : rhs_code != FIXED_CONVERT_EXPR) 3067 rhs1 = gimple_assign_rhs1 (stmt);
1296 return false; 3068
1297 3069 if (TREE_CODE (rhs1) == INTEGER_CST)
1298 rhs1 = gimple_assign_rhs1 (stmt); 3070 {
3071 *new_rhs_out = rhs1;
3072 *type_out = NULL;
3073 return true;
3074 }
3075 }
3076 }
3077 else
3078 rhs1 = rhs;
3079
1299 type1 = TREE_TYPE (rhs1); 3080 type1 = TREE_TYPE (rhs1);
3081
1300 if (TREE_CODE (type1) != TREE_CODE (type) 3082 if (TREE_CODE (type1) != TREE_CODE (type)
1301 || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) 3083 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
1302 return false; 3084 return false;
1303 3085
1304 *new_rhs_out = rhs1; 3086 *new_rhs_out = rhs1;
1305 *type_out = type1; 3087 *type_out = type1;
1306 return true; 3088 return true;
1314 } 3096 }
1315 3097
1316 return false; 3098 return false;
1317 } 3099 }
1318 3100
1319 /* Return true if STMT performs a widening multiplication. If so, 3101 /* Return true if STMT performs a widening multiplication, assuming the
1320 store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT 3102 output type is TYPE. If so, store the unwidened types of the operands
1321 respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting 3103 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
1322 those operands to types *TYPE1_OUT and *TYPE2_OUT would give the 3104 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
1323 operands of the multiplication. */ 3105 and *TYPE2_OUT would give the operands of the multiplication. */
1324 3106
1325 static bool 3107 static bool
1326 is_widening_mult_p (gimple stmt, 3108 is_widening_mult_p (gimple *stmt,
1327 tree *type1_out, tree *rhs1_out, 3109 tree *type1_out, tree *rhs1_out,
1328 tree *type2_out, tree *rhs2_out) 3110 tree *type2_out, tree *rhs2_out)
1329 { 3111 {
1330 tree type; 3112 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1331 3113
1332 type = TREE_TYPE (gimple_assign_lhs (stmt));
1333 if (TREE_CODE (type) != INTEGER_TYPE 3114 if (TREE_CODE (type) != INTEGER_TYPE
1334 && TREE_CODE (type) != FIXED_POINT_TYPE) 3115 && TREE_CODE (type) != FIXED_POINT_TYPE)
1335 return false; 3116 return false;
1336 3117
1337 if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out)) 3118 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
3119 rhs1_out))
1338 return false; 3120 return false;
1339 3121
1340 if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out)) 3122 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
3123 rhs2_out))
1341 return false; 3124 return false;
1342 3125
1343 if (*type1_out == NULL) 3126 if (*type1_out == NULL)
1344 { 3127 {
1345 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) 3128 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
1352 if (!int_fits_type_p (*rhs2_out, *type1_out)) 3135 if (!int_fits_type_p (*rhs2_out, *type1_out))
1353 return false; 3136 return false;
1354 *type2_out = *type1_out; 3137 *type2_out = *type1_out;
1355 } 3138 }
1356 3139
3140 /* Ensure that the larger of the two operands comes first. */
3141 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3142 {
3143 std::swap (*type1_out, *type2_out);
3144 std::swap (*rhs1_out, *rhs2_out);
3145 }
3146
1357 return true; 3147 return true;
3148 }
3149
3150 /* Check to see if the CALL statement is an invocation of copysign
3151 with 1. being the first argument. */
3152 static bool
3153 is_copysign_call_with_1 (gimple *call)
3154 {
3155 gcall *c = dyn_cast <gcall *> (call);
3156 if (! c)
3157 return false;
3158
3159 enum combined_fn code = gimple_call_combined_fn (c);
3160
3161 if (code == CFN_LAST)
3162 return false;
3163
3164 if (builtin_fn_p (code))
3165 {
3166 switch (as_builtin_fn (code))
3167 {
3168 CASE_FLT_FN (BUILT_IN_COPYSIGN):
3169 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
3170 return real_onep (gimple_call_arg (c, 0));
3171 default:
3172 return false;
3173 }
3174 }
3175
3176 if (internal_fn_p (code))
3177 {
3178 switch (as_internal_fn (code))
3179 {
3180 case IFN_COPYSIGN:
3181 return real_onep (gimple_call_arg (c, 0));
3182 default:
3183 return false;
3184 }
3185 }
3186
3187 return false;
3188 }
3189
3190 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
3191 This only happens when the the xorsign optab is defined, if the
3192 pattern is not a xorsign pattern or if expansion fails FALSE is
3193 returned, otherwise TRUE is returned. */
3194 static bool
3195 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
3196 {
3197 tree treeop0, treeop1, lhs, type;
3198 location_t loc = gimple_location (stmt);
3199 lhs = gimple_assign_lhs (stmt);
3200 treeop0 = gimple_assign_rhs1 (stmt);
3201 treeop1 = gimple_assign_rhs2 (stmt);
3202 type = TREE_TYPE (lhs);
3203 machine_mode mode = TYPE_MODE (type);
3204
3205 if (HONOR_SNANS (type))
3206 return false;
3207
3208 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
3209 {
3210 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
3211 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
3212 {
3213 call0 = SSA_NAME_DEF_STMT (treeop1);
3214 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
3215 return false;
3216
3217 treeop1 = treeop0;
3218 }
3219 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
3220 return false;
3221
3222 gcall *c = as_a<gcall*> (call0);
3223 treeop0 = gimple_call_arg (c, 1);
3224
3225 gcall *call_stmt
3226 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
3227 gimple_set_lhs (call_stmt, lhs);
3228 gimple_set_location (call_stmt, loc);
3229 gsi_replace (gsi, call_stmt, true);
3230 return true;
3231 }
3232
3233 return false;
1358 } 3234 }
1359 3235
1360 /* Process a single gimple statement STMT, which has a MULT_EXPR as 3236 /* Process a single gimple statement STMT, which has a MULT_EXPR as
1361 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return 3237 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
1362 value is true iff we converted the statement. */ 3238 value is true iff we converted the statement. */
1363 3239
1364 static bool 3240 static bool
1365 convert_mult_to_widen (gimple stmt) 3241 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
1366 { 3242 {
1367 tree lhs, rhs1, rhs2, type, type1, type2; 3243 tree lhs, rhs1, rhs2, type, type1, type2;
1368 enum insn_code handler; 3244 enum insn_code handler;
3245 machine_mode to_mode, from_mode, actual_mode;
3246 optab op;
3247 int actual_precision;
3248 location_t loc = gimple_location (stmt);
3249 bool from_unsigned1, from_unsigned2;
1369 3250
1370 lhs = gimple_assign_lhs (stmt); 3251 lhs = gimple_assign_lhs (stmt);
1371 type = TREE_TYPE (lhs); 3252 type = TREE_TYPE (lhs);
1372 if (TREE_CODE (type) != INTEGER_TYPE) 3253 if (TREE_CODE (type) != INTEGER_TYPE)
1373 return false; 3254 return false;
1374 3255
1375 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) 3256 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
1376 return false; 3257 return false;
1377 3258
1378 if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) 3259 to_mode = SCALAR_INT_TYPE_MODE (type);
1379 handler = optab_handler (umul_widen_optab, TYPE_MODE (type)); 3260 from_mode = SCALAR_INT_TYPE_MODE (type1);
1380 else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) 3261 from_unsigned1 = TYPE_UNSIGNED (type1);
1381 handler = optab_handler (smul_widen_optab, TYPE_MODE (type)); 3262 from_unsigned2 = TYPE_UNSIGNED (type2);
3263
3264 if (from_unsigned1 && from_unsigned2)
3265 op = umul_widen_optab;
3266 else if (!from_unsigned1 && !from_unsigned2)
3267 op = smul_widen_optab;
1382 else 3268 else
1383 handler = optab_handler (usmul_widen_optab, TYPE_MODE (type)); 3269 op = usmul_widen_optab;
3270
3271 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3272 0, &actual_mode);
1384 3273
1385 if (handler == CODE_FOR_nothing) 3274 if (handler == CODE_FOR_nothing)
3275 {
3276 if (op != smul_widen_optab)
3277 {
3278 /* We can use a signed multiply with unsigned types as long as
3279 there is a wider mode to use, or it is the smaller of the two
3280 types that is unsigned. Note that type1 >= type2, always. */
3281 if ((TYPE_UNSIGNED (type1)
3282 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3283 || (TYPE_UNSIGNED (type2)
3284 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3285 {
3286 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3287 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3288 return false;
3289 }
3290
3291 op = smul_widen_optab;
3292 handler = find_widening_optab_handler_and_mode (op, to_mode,
3293 from_mode, 0,
3294 &actual_mode);
3295
3296 if (handler == CODE_FOR_nothing)
3297 return false;
3298
3299 from_unsigned1 = from_unsigned2 = false;
3300 }
3301 else
3302 return false;
3303 }
3304
3305 /* Ensure that the inputs to the handler are in the correct precison
3306 for the opcode. This will be the full mode size. */
3307 actual_precision = GET_MODE_PRECISION (actual_mode);
3308 if (2 * actual_precision > TYPE_PRECISION (type))
1386 return false; 3309 return false;
1387 3310 if (actual_precision != TYPE_PRECISION (type1)
1388 gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); 3311 || from_unsigned1 != TYPE_UNSIGNED (type1))
1389 gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); 3312 rhs1 = build_and_insert_cast (gsi, loc,
3313 build_nonstandard_integer_type
3314 (actual_precision, from_unsigned1), rhs1);
3315 if (actual_precision != TYPE_PRECISION (type2)
3316 || from_unsigned2 != TYPE_UNSIGNED (type2))
3317 rhs2 = build_and_insert_cast (gsi, loc,
3318 build_nonstandard_integer_type
3319 (actual_precision, from_unsigned2), rhs2);
3320
3321 /* Handle constants. */
3322 if (TREE_CODE (rhs1) == INTEGER_CST)
3323 rhs1 = fold_convert (type1, rhs1);
3324 if (TREE_CODE (rhs2) == INTEGER_CST)
3325 rhs2 = fold_convert (type2, rhs2);
3326
3327 gimple_assign_set_rhs1 (stmt, rhs1);
3328 gimple_assign_set_rhs2 (stmt, rhs2);
1390 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); 3329 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
1391 update_stmt (stmt); 3330 update_stmt (stmt);
3331 widen_mul_stats.widen_mults_inserted++;
1392 return true; 3332 return true;
1393 } 3333 }
1394 3334
1395 /* Process a single gimple statement STMT, which is found at the 3335 /* Process a single gimple statement STMT, which is found at the
1396 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its 3336 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
1397 rhs (given by CODE), and try to convert it into a 3337 rhs (given by CODE), and try to convert it into a
1398 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value 3338 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
1399 is true iff we converted the statement. */ 3339 is true iff we converted the statement. */
1400 3340
1401 static bool 3341 static bool
1402 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, 3342 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
1403 enum tree_code code) 3343 enum tree_code code)
1404 { 3344 {
1405 gimple rhs1_stmt = NULL, rhs2_stmt = NULL; 3345 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
1406 tree type, type1, type2; 3346 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
3347 tree type, type1, type2, optype;
1407 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; 3348 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
1408 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; 3349 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
1409 optab this_optab; 3350 optab this_optab;
1410 enum tree_code wmult_code; 3351 enum tree_code wmult_code;
3352 enum insn_code handler;
3353 scalar_mode to_mode, from_mode;
3354 machine_mode actual_mode;
3355 location_t loc = gimple_location (stmt);
3356 int actual_precision;
3357 bool from_unsigned1, from_unsigned2;
1411 3358
1412 lhs = gimple_assign_lhs (stmt); 3359 lhs = gimple_assign_lhs (stmt);
1413 type = TREE_TYPE (lhs); 3360 type = TREE_TYPE (lhs);
1414 if (TREE_CODE (type) != INTEGER_TYPE 3361 if (TREE_CODE (type) != INTEGER_TYPE
1415 && TREE_CODE (type) != FIXED_POINT_TYPE) 3362 && TREE_CODE (type) != FIXED_POINT_TYPE)
1427 { 3374 {
1428 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); 3375 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1429 if (is_gimple_assign (rhs1_stmt)) 3376 if (is_gimple_assign (rhs1_stmt))
1430 rhs1_code = gimple_assign_rhs_code (rhs1_stmt); 3377 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
1431 } 3378 }
1432 else
1433 return false;
1434 3379
1435 if (TREE_CODE (rhs2) == SSA_NAME) 3380 if (TREE_CODE (rhs2) == SSA_NAME)
1436 { 3381 {
1437 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); 3382 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1438 if (is_gimple_assign (rhs2_stmt)) 3383 if (is_gimple_assign (rhs2_stmt))
1439 rhs2_code = gimple_assign_rhs_code (rhs2_stmt); 3384 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
1440 } 3385 }
3386
3387 /* Allow for one conversion statement between the multiply
3388 and addition/subtraction statement. If there are more than
3389 one conversions then we assume they would invalidate this
3390 transformation. If that's not the case then they should have
3391 been folded before now. */
3392 if (CONVERT_EXPR_CODE_P (rhs1_code))
3393 {
3394 conv1_stmt = rhs1_stmt;
3395 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3396 if (TREE_CODE (rhs1) == SSA_NAME)
3397 {
3398 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3399 if (is_gimple_assign (rhs1_stmt))
3400 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3401 }
3402 else
3403 return false;
3404 }
3405 if (CONVERT_EXPR_CODE_P (rhs2_code))
3406 {
3407 conv2_stmt = rhs2_stmt;
3408 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3409 if (TREE_CODE (rhs2) == SSA_NAME)
3410 {
3411 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3412 if (is_gimple_assign (rhs2_stmt))
3413 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3414 }
3415 else
3416 return false;
3417 }
3418
3419 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3420 is_widening_mult_p, but we still need the rhs returns.
3421
3422 It might also appear that it would be sufficient to use the existing
3423 operands of the widening multiply, but that would limit the choice of
3424 multiply-and-accumulate instructions.
3425
3426 If the widened-multiplication result has more than one uses, it is
3427 probably wiser not to do the conversion. */
3428 if (code == PLUS_EXPR
3429 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3430 {
3431 if (!has_single_use (rhs1)
3432 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3433 &type2, &mult_rhs2))
3434 return false;
3435 add_rhs = rhs2;
3436 conv_stmt = conv1_stmt;
3437 }
3438 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3439 {
3440 if (!has_single_use (rhs2)
3441 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3442 &type2, &mult_rhs2))
3443 return false;
3444 add_rhs = rhs1;
3445 conv_stmt = conv2_stmt;
3446 }
1441 else 3447 else
1442 return false; 3448 return false;
1443 3449
1444 if (code == PLUS_EXPR && rhs1_code == MULT_EXPR) 3450 to_mode = SCALAR_TYPE_MODE (type);
1445 { 3451 from_mode = SCALAR_TYPE_MODE (type1);
1446 if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, 3452 from_unsigned1 = TYPE_UNSIGNED (type1);
1447 &type2, &mult_rhs2)) 3453 from_unsigned2 = TYPE_UNSIGNED (type2);
3454 optype = type1;
3455
3456 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3457 if (from_unsigned1 != from_unsigned2)
3458 {
3459 if (!INTEGRAL_TYPE_P (type))
1448 return false; 3460 return false;
1449 add_rhs = rhs2; 3461 /* We can use a signed multiply with unsigned types as long as
1450 } 3462 there is a wider mode to use, or it is the smaller of the two
1451 else if (rhs2_code == MULT_EXPR) 3463 types that is unsigned. Note that type1 >= type2, always. */
1452 { 3464 if ((from_unsigned1
1453 if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, 3465 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
1454 &type2, &mult_rhs2)) 3466 || (from_unsigned2
1455 return false; 3467 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
1456 add_rhs = rhs1; 3468 {
1457 } 3469 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
1458 else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR) 3470 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
1459 { 3471 return false;
1460 mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt); 3472 }
1461 mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt); 3473
1462 type1 = TREE_TYPE (mult_rhs1); 3474 from_unsigned1 = from_unsigned2 = false;
1463 type2 = TREE_TYPE (mult_rhs2); 3475 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
1464 add_rhs = rhs2; 3476 false);
1465 } 3477 }
1466 else if (rhs2_code == WIDEN_MULT_EXPR) 3478
1467 { 3479 /* If there was a conversion between the multiply and addition
1468 mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt); 3480 then we need to make sure it fits a multiply-and-accumulate.
1469 mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt); 3481 The should be a single mode change which does not change the
1470 type1 = TREE_TYPE (mult_rhs1); 3482 value. */
1471 type2 = TREE_TYPE (mult_rhs2); 3483 if (conv_stmt)
1472 add_rhs = rhs1; 3484 {
1473 } 3485 /* We use the original, unmodified data types for this. */
1474 else 3486 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
1475 return false; 3487 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
1476 3488 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
1477 if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) 3489 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
1478 return false; 3490
3491 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3492 {
3493 /* Conversion is a truncate. */
3494 if (TYPE_PRECISION (to_type) < data_size)
3495 return false;
3496 }
3497 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3498 {
3499 /* Conversion is an extend. Check it's the right sort. */
3500 if (TYPE_UNSIGNED (from_type) != is_unsigned
3501 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3502 return false;
3503 }
3504 /* else convert is a no-op for our purposes. */
3505 }
1479 3506
1480 /* Verify that the machine can perform a widening multiply 3507 /* Verify that the machine can perform a widening multiply
1481 accumulate in this mode/signedness combination, otherwise 3508 accumulate in this mode/signedness combination, otherwise
1482 this transformation is likely to pessimize code. */ 3509 this transformation is likely to pessimize code. */
1483 this_optab = optab_for_tree_code (wmult_code, type1, optab_default); 3510 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
1484 if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing) 3511 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3512 from_mode, 0, &actual_mode);
3513
3514 if (handler == CODE_FOR_nothing)
1485 return false; 3515 return false;
1486 3516
1487 /* ??? May need some type verification here? */ 3517 /* Ensure that the inputs to the handler are in the correct precison
1488 3518 for the opcode. This will be the full mode size. */
1489 gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, 3519 actual_precision = GET_MODE_PRECISION (actual_mode);
1490 fold_convert (type1, mult_rhs1), 3520 if (actual_precision != TYPE_PRECISION (type1)
1491 fold_convert (type2, mult_rhs2), 3521 || from_unsigned1 != TYPE_UNSIGNED (type1))
1492 add_rhs); 3522 mult_rhs1 = build_and_insert_cast (gsi, loc,
3523 build_nonstandard_integer_type
3524 (actual_precision, from_unsigned1),
3525 mult_rhs1);
3526 if (actual_precision != TYPE_PRECISION (type2)
3527 || from_unsigned2 != TYPE_UNSIGNED (type2))
3528 mult_rhs2 = build_and_insert_cast (gsi, loc,
3529 build_nonstandard_integer_type
3530 (actual_precision, from_unsigned2),
3531 mult_rhs2);
3532
3533 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3534 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3535
3536 /* Handle constants. */
3537 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3538 mult_rhs1 = fold_convert (type1, mult_rhs1);
3539 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3540 mult_rhs2 = fold_convert (type2, mult_rhs2);
3541
3542 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3543 add_rhs);
1493 update_stmt (gsi_stmt (*gsi)); 3544 update_stmt (gsi_stmt (*gsi));
3545 widen_mul_stats.maccs_inserted++;
1494 return true; 3546 return true;
1495 } 3547 }
1496 3548
1497 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 3549 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
1498 with uses in additions and subtractions to form fused multiply-add 3550 with uses in additions and subtractions to form fused multiply-add
1499 operations. Returns true if successful and MUL_STMT should be removed. */ 3551 operations. Returns true if successful and MUL_STMT should be removed. */
1500 3552
1501 static bool 3553 static bool
1502 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2) 3554 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
1503 { 3555 {
1504 tree mul_result = gimple_get_lhs (mul_stmt); 3556 tree mul_result = gimple_get_lhs (mul_stmt);
1505 tree type = TREE_TYPE (mul_result); 3557 tree type = TREE_TYPE (mul_result);
1506 gimple use_stmt, neguse_stmt, fma_stmt; 3558 gimple *use_stmt, *neguse_stmt;
3559 gassign *fma_stmt;
1507 use_operand_p use_p; 3560 use_operand_p use_p;
1508 imm_use_iterator imm_iter; 3561 imm_use_iterator imm_iter;
1509 3562
1510 if (FLOAT_TYPE_P (type) 3563 if (FLOAT_TYPE_P (type)
1511 && flag_fp_contract_mode == FP_CONTRACT_OFF) 3564 && flag_fp_contract_mode == FP_CONTRACT_OFF)
1512 return false; 3565 return false;
1513 3566
1514 /* We don't want to do bitfield reduction ops. */ 3567 /* We don't want to do bitfield reduction ops. */
1515 if (INTEGRAL_TYPE_P (type) 3568 if (INTEGRAL_TYPE_P (type)
1516 && (TYPE_PRECISION (type) 3569 && !type_has_mode_precision_p (type))
1517 != GET_MODE_PRECISION (TYPE_MODE (type))))
1518 return false; 3570 return false;
1519 3571
1520 /* If the target doesn't support it, don't generate it. We assume that 3572 /* If the target doesn't support it, don't generate it. We assume that
1521 if fma isn't available then fms, fnma or fnms are not either. */ 3573 if fma isn't available then fms, fnma or fnms are not either. */
1522 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) 3574 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3575 return false;
3576
3577 /* If the multiplication has zero uses, it is kept around probably because
3578 of -fnon-call-exceptions. Don't optimize it away in that case,
3579 it is DCE job. */
3580 if (has_zero_uses (mul_result))
1523 return false; 3581 return false;
1524 3582
1525 /* Make sure that the multiplication statement becomes dead after 3583 /* Make sure that the multiplication statement becomes dead after
1526 the transformation, thus that all uses are transformed to FMAs. 3584 the transformation, thus that all uses are transformed to FMAs.
1527 This means we assume that an FMA operation has the same cost 3585 This means we assume that an FMA operation has the same cost
1556 3614
1557 /* A negate on the multiplication leads to FNMA. */ 3615 /* A negate on the multiplication leads to FNMA. */
1558 if (use_code == NEGATE_EXPR) 3616 if (use_code == NEGATE_EXPR)
1559 { 3617 {
1560 ssa_op_iter iter; 3618 ssa_op_iter iter;
1561 tree use; 3619 use_operand_p usep;
1562 3620
1563 result = gimple_assign_lhs (use_stmt); 3621 result = gimple_assign_lhs (use_stmt);
1564 3622
1565 /* Make sure the negate statement becomes dead with this 3623 /* Make sure the negate statement becomes dead with this
1566 single transformation. */ 3624 single transformation. */
1567 if (!single_imm_use (gimple_assign_lhs (use_stmt), 3625 if (!single_imm_use (gimple_assign_lhs (use_stmt),
1568 &use_p, &neguse_stmt)) 3626 &use_p, &neguse_stmt))
1569 return false; 3627 return false;
1570 3628
1571 /* Make sure the multiplication isn't also used on that stmt. */ 3629 /* Make sure the multiplication isn't also used on that stmt. */
1572 FOR_EACH_SSA_TREE_OPERAND (use, neguse_stmt, iter, SSA_OP_USE) 3630 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
1573 if (use == mul_result) 3631 if (USE_FROM_PTR (usep) == mul_result)
1574 return false; 3632 return false;
1575 3633
1576 /* Re-validate. */ 3634 /* Re-validate. */
1577 use_stmt = neguse_stmt; 3635 use_stmt = neguse_stmt;
1578 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) 3636 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
1595 default: 3653 default:
1596 /* FMA can only be formed from PLUS and MINUS. */ 3654 /* FMA can only be formed from PLUS and MINUS. */
1597 return false; 3655 return false;
1598 } 3656 }
1599 3657
3658 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3659 by a MULT_EXPR that we'll visit later, we might be able to
3660 get a more profitable match with fnma.
3661 OTOH, if we don't, a negate / fma pair has likely lower latency
3662 that a mult / subtract pair. */
3663 if (use_code == MINUS_EXPR && !negate_p
3664 && gimple_assign_rhs1 (use_stmt) == result
3665 && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3666 && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3667 {
3668 tree rhs2 = gimple_assign_rhs2 (use_stmt);
3669
3670 if (TREE_CODE (rhs2) == SSA_NAME)
3671 {
3672 gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
3673 if (has_single_use (rhs2)
3674 && is_gimple_assign (stmt2)
3675 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3676 return false;
3677 }
3678 }
3679
1600 /* We can't handle a * b + a * b. */ 3680 /* We can't handle a * b + a * b. */
1601 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) 3681 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
1602 return false; 3682 return false;
1603 3683
1604 /* While it is possible to validate whether or not the exact form 3684 /* While it is possible to validate whether or not the exact form
1606 is that the transformation is never a loss. For instance, suppose 3686 is that the transformation is never a loss. For instance, suppose
1607 the target only has the plain FMA pattern available. Consider 3687 the target only has the plain FMA pattern available. Consider
1608 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which 3688 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
1609 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we 3689 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
1610 still have 3 operations, but in the FMA form the two NEGs are 3690 still have 3 operations, but in the FMA form the two NEGs are
1611 independant and could be run in parallel. */ 3691 independent and could be run in parallel. */
1612 } 3692 }
1613 3693
1614 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) 3694 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
1615 { 3695 {
1616 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 3696 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1659 build1 (NEGATE_EXPR, 3739 build1 (NEGATE_EXPR,
1660 type, mulop1), 3740 type, mulop1),
1661 true, NULL_TREE, true, 3741 true, NULL_TREE, true,
1662 GSI_SAME_STMT); 3742 GSI_SAME_STMT);
1663 3743
1664 fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR, 3744 fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
1665 gimple_assign_lhs (use_stmt), 3745 FMA_EXPR, mulop1, op2, addop);
1666 mulop1, op2,
1667 addop);
1668 gsi_replace (&gsi, fma_stmt, true); 3746 gsi_replace (&gsi, fma_stmt, true);
3747 widen_mul_stats.fmas_inserted++;
1669 } 3748 }
1670 3749
1671 return true; 3750 return true;
1672 } 3751 }
3752
3753
3754 /* Helper function of match_uaddsub_overflow. Return 1
3755 if USE_STMT is unsigned overflow check ovf != 0 for
3756 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3757 and 0 otherwise. */
3758
3759 static int
3760 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3761 {
3762 enum tree_code ccode = ERROR_MARK;
3763 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3764 if (gimple_code (use_stmt) == GIMPLE_COND)
3765 {
3766 ccode = gimple_cond_code (use_stmt);
3767 crhs1 = gimple_cond_lhs (use_stmt);
3768 crhs2 = gimple_cond_rhs (use_stmt);
3769 }
3770 else if (is_gimple_assign (use_stmt))
3771 {
3772 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3773 {
3774 ccode = gimple_assign_rhs_code (use_stmt);
3775 crhs1 = gimple_assign_rhs1 (use_stmt);
3776 crhs2 = gimple_assign_rhs2 (use_stmt);
3777 }
3778 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3779 {
3780 tree cond = gimple_assign_rhs1 (use_stmt);
3781 if (COMPARISON_CLASS_P (cond))
3782 {
3783 ccode = TREE_CODE (cond);
3784 crhs1 = TREE_OPERAND (cond, 0);
3785 crhs2 = TREE_OPERAND (cond, 1);
3786 }
3787 else
3788 return 0;
3789 }
3790 else
3791 return 0;
3792 }
3793 else
3794 return 0;
3795
3796 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3797 return 0;
3798
3799 enum tree_code code = gimple_assign_rhs_code (stmt);
3800 tree lhs = gimple_assign_lhs (stmt);
3801 tree rhs1 = gimple_assign_rhs1 (stmt);
3802 tree rhs2 = gimple_assign_rhs2 (stmt);
3803
3804 switch (ccode)
3805 {
3806 case GT_EXPR:
3807 case LE_EXPR:
3808 /* r = a - b; r > a or r <= a
3809 r = a + b; a > r or a <= r or b > r or b <= r. */
3810 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3811 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3812 && crhs2 == lhs))
3813 return ccode == GT_EXPR ? 1 : -1;
3814 break;
3815 case LT_EXPR:
3816 case GE_EXPR:
3817 /* r = a - b; a < r or a >= r
3818 r = a + b; r < a or r >= a or r < b or r >= b. */
3819 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3820 || (code == PLUS_EXPR && crhs1 == lhs
3821 && (crhs2 == rhs1 || crhs2 == rhs2)))
3822 return ccode == LT_EXPR ? 1 : -1;
3823 break;
3824 default:
3825 break;
3826 }
3827 return 0;
3828 }
3829
3830 /* Recognize for unsigned x
3831 x = y - z;
3832 if (x > y)
3833 where there are other uses of x and replace it with
3834 _7 = SUB_OVERFLOW (y, z);
3835 x = REALPART_EXPR <_7>;
3836 _8 = IMAGPART_EXPR <_7>;
3837 if (_8)
3838 and similarly for addition. */
3839
3840 static bool
3841 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3842 enum tree_code code)
3843 {
3844 tree lhs = gimple_assign_lhs (stmt);
3845 tree type = TREE_TYPE (lhs);
3846 use_operand_p use_p;
3847 imm_use_iterator iter;
3848 bool use_seen = false;
3849 bool ovf_use_seen = false;
3850 gimple *use_stmt;
3851
3852 gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3853 if (!INTEGRAL_TYPE_P (type)
3854 || !TYPE_UNSIGNED (type)
3855 || has_zero_uses (lhs)
3856 || has_single_use (lhs)
3857 || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3858 TYPE_MODE (type)) == CODE_FOR_nothing)
3859 return false;
3860
3861 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3862 {
3863 use_stmt = USE_STMT (use_p);
3864 if (is_gimple_debug (use_stmt))
3865 continue;
3866
3867 if (uaddsub_overflow_check_p (stmt, use_stmt))
3868 ovf_use_seen = true;
3869 else
3870 use_seen = true;
3871 if (ovf_use_seen && use_seen)
3872 break;
3873 }
3874
3875 if (!ovf_use_seen || !use_seen)
3876 return false;
3877
3878 tree ctype = build_complex_type (type);
3879 tree rhs1 = gimple_assign_rhs1 (stmt);
3880 tree rhs2 = gimple_assign_rhs2 (stmt);
3881 gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3882 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3883 2, rhs1, rhs2);
3884 tree ctmp = make_ssa_name (ctype);
3885 gimple_call_set_lhs (g, ctmp);
3886 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3887 gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3888 build1 (REALPART_EXPR, type, ctmp));
3889 gsi_replace (gsi, g2, true);
3890 tree ovf = make_ssa_name (type);
3891 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3892 build1 (IMAGPART_EXPR, type, ctmp));
3893 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3894
3895 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3896 {
3897 if (is_gimple_debug (use_stmt))
3898 continue;
3899
3900 int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3901 if (ovf_use == 0)
3902 continue;
3903 if (gimple_code (use_stmt) == GIMPLE_COND)
3904 {
3905 gcond *cond_stmt = as_a <gcond *> (use_stmt);
3906 gimple_cond_set_lhs (cond_stmt, ovf);
3907 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3908 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3909 }
3910 else
3911 {
3912 gcc_checking_assert (is_gimple_assign (use_stmt));
3913 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3914 {
3915 gimple_assign_set_rhs1 (use_stmt, ovf);
3916 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3917 gimple_assign_set_rhs_code (use_stmt,
3918 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3919 }
3920 else
3921 {
3922 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3923 == COND_EXPR);
3924 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3925 boolean_type_node, ovf,
3926 build_int_cst (type, 0));
3927 gimple_assign_set_rhs1 (use_stmt, cond);
3928 }
3929 }
3930 update_stmt (use_stmt);
3931 }
3932 return true;
3933 }
3934
3935 /* Return true if target has support for divmod. */
3936
3937 static bool
3938 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3939 {
3940 /* If target supports hardware divmod insn, use it for divmod. */
3941 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3942 return true;
3943
3944 /* Check if libfunc for divmod is available. */
3945 rtx libfunc = optab_libfunc (divmod_optab, mode);
3946 if (libfunc != NULL_RTX)
3947 {
3948 /* If optab_handler exists for div_optab, perhaps in a wider mode,
3949 we don't want to use the libfunc even if it exists for given mode. */
3950 machine_mode div_mode;
3951 FOR_EACH_MODE_FROM (div_mode, mode)
3952 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3953 return false;
3954
3955 return targetm.expand_divmod_libfunc != NULL;
3956 }
3957
3958 return false;
3959 }
3960
3961 /* Check if stmt is candidate for divmod transform. */
3962
3963 static bool
3964 divmod_candidate_p (gassign *stmt)
3965 {
3966 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3967 machine_mode mode = TYPE_MODE (type);
3968 optab divmod_optab, div_optab;
3969
3970 if (TYPE_UNSIGNED (type))
3971 {
3972 divmod_optab = udivmod_optab;
3973 div_optab = udiv_optab;
3974 }
3975 else
3976 {
3977 divmod_optab = sdivmod_optab;
3978 div_optab = sdiv_optab;
3979 }
3980
3981 tree op1 = gimple_assign_rhs1 (stmt);
3982 tree op2 = gimple_assign_rhs2 (stmt);
3983
3984 /* Disable the transform if either is a constant, since division-by-constant
3985 may have specialized expansion. */
3986 if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3987 return false;
3988
3989 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3990 expand using the [su]divv optabs. */
3991 if (TYPE_OVERFLOW_TRAPS (type))
3992 return false;
3993
3994 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3995 return false;
3996
3997 return true;
3998 }
3999
4000 /* This function looks for:
4001 t1 = a TRUNC_DIV_EXPR b;
4002 t2 = a TRUNC_MOD_EXPR b;
4003 and transforms it to the following sequence:
4004 complex_tmp = DIVMOD (a, b);
4005 t1 = REALPART_EXPR(a);
4006 t2 = IMAGPART_EXPR(b);
4007 For conditions enabling the transform see divmod_candidate_p().
4008
4009 The pass has three parts:
4010 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4011 other trunc_div_expr and trunc_mod_expr stmts.
4012 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4013 to stmts vector.
4014 3) Insert DIVMOD call just before top_stmt and update entries in
4015 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4016 IMAGPART_EXPR for mod). */
4017
4018 static bool
4019 convert_to_divmod (gassign *stmt)
4020 {
4021 if (stmt_can_throw_internal (stmt)
4022 || !divmod_candidate_p (stmt))
4023 return false;
4024
4025 tree op1 = gimple_assign_rhs1 (stmt);
4026 tree op2 = gimple_assign_rhs2 (stmt);
4027
4028 imm_use_iterator use_iter;
4029 gimple *use_stmt;
4030 auto_vec<gimple *> stmts;
4031
4032 gimple *top_stmt = stmt;
4033 basic_block top_bb = gimple_bb (stmt);
4034
4035 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4036 at-least stmt and possibly other trunc_div/trunc_mod stmts
4037 having same operands as stmt. */
4038
4039 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4040 {
4041 if (is_gimple_assign (use_stmt)
4042 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4043 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4044 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4045 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4046 {
4047 if (stmt_can_throw_internal (use_stmt))
4048 continue;
4049
4050 basic_block bb = gimple_bb (use_stmt);
4051
4052 if (bb == top_bb)
4053 {
4054 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4055 top_stmt = use_stmt;
4056 }
4057 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4058 {
4059 top_bb = bb;
4060 top_stmt = use_stmt;
4061 }
4062 }
4063 }
4064
4065 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4066 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4067
4068 stmts.safe_push (top_stmt);
4069 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4070
4071 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4072 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4073 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4074 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4075
4076 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4077 {
4078 if (is_gimple_assign (use_stmt)
4079 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4080 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4081 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4082 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4083 {
4084 if (use_stmt == top_stmt
4085 || stmt_can_throw_internal (use_stmt)
4086 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4087 continue;
4088
4089 stmts.safe_push (use_stmt);
4090 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4091 div_seen = true;
4092 }
4093 }
4094
4095 if (!div_seen)
4096 return false;
4097
4098 /* Part 3: Create libcall to internal fn DIVMOD:
4099 divmod_tmp = DIVMOD (op1, op2). */
4100
4101 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4102 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4103 call_stmt, "divmod_tmp");
4104 gimple_call_set_lhs (call_stmt, res);
4105 /* We rejected throwing statements above. */
4106 gimple_call_set_nothrow (call_stmt, true);
4107
4108 /* Insert the call before top_stmt. */
4109 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4110 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4111
4112 widen_mul_stats.divmod_calls_inserted++;
4113
4114 /* Update all statements in stmts vector:
4115 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4116 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4117
4118 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4119 {
4120 tree new_rhs;
4121
4122 switch (gimple_assign_rhs_code (use_stmt))
4123 {
4124 case TRUNC_DIV_EXPR:
4125 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4126 break;
4127
4128 case TRUNC_MOD_EXPR:
4129 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4130 break;
4131
4132 default:
4133 gcc_unreachable ();
4134 }
4135
4136 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4137 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4138 update_stmt (use_stmt);
4139 }
4140
4141 return true;
4142 }
1673 4143
1674 /* Find integer multiplications where the operands are extended from 4144 /* Find integer multiplications where the operands are extended from
1675 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR 4145 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
1676 where appropriate. */ 4146 where appropriate. */
1677 4147
1678 static unsigned int 4148 namespace {
1679 execute_optimize_widening_mul (void) 4149
4150 const pass_data pass_data_optimize_widening_mul =
4151 {
4152 GIMPLE_PASS, /* type */
4153 "widening_mul", /* name */
4154 OPTGROUP_NONE, /* optinfo_flags */
4155 TV_NONE, /* tv_id */
4156 PROP_ssa, /* properties_required */
4157 0, /* properties_provided */
4158 0, /* properties_destroyed */
4159 0, /* todo_flags_start */
4160 TODO_update_ssa, /* todo_flags_finish */
4161 };
4162
4163 class pass_optimize_widening_mul : public gimple_opt_pass
4164 {
4165 public:
4166 pass_optimize_widening_mul (gcc::context *ctxt)
4167 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4168 {}
4169
4170 /* opt_pass methods: */
4171 virtual bool gate (function *)
4172 {
4173 return flag_expensive_optimizations && optimize;
4174 }
4175
4176 virtual unsigned int execute (function *);
4177
4178 }; // class pass_optimize_widening_mul
4179
4180 unsigned int
4181 pass_optimize_widening_mul::execute (function *fun)
1680 { 4182 {
1681 basic_block bb; 4183 basic_block bb;
1682 bool cfg_changed = false; 4184 bool cfg_changed = false;
1683 4185
1684 FOR_EACH_BB (bb) 4186 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4187 calculate_dominance_info (CDI_DOMINATORS);
4188 renumber_gimple_stmt_uids ();
4189
4190 FOR_EACH_BB_FN (bb, fun)
1685 { 4191 {
1686 gimple_stmt_iterator gsi; 4192 gimple_stmt_iterator gsi;
1687 4193
1688 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) 4194 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1689 { 4195 {
1690 gimple stmt = gsi_stmt (gsi); 4196 gimple *stmt = gsi_stmt (gsi);
1691 enum tree_code code; 4197 enum tree_code code;
1692 4198
1693 if (is_gimple_assign (stmt)) 4199 if (is_gimple_assign (stmt))
1694 { 4200 {
1695 code = gimple_assign_rhs_code (stmt); 4201 code = gimple_assign_rhs_code (stmt);
1696 switch (code) 4202 switch (code)
1697 { 4203 {
1698 case MULT_EXPR: 4204 case MULT_EXPR:
1699 if (!convert_mult_to_widen (stmt) 4205 if (!convert_mult_to_widen (stmt, &gsi)
4206 && !convert_expand_mult_copysign (stmt, &gsi)
1700 && convert_mult_to_fma (stmt, 4207 && convert_mult_to_fma (stmt,
1701 gimple_assign_rhs1 (stmt), 4208 gimple_assign_rhs1 (stmt),
1702 gimple_assign_rhs2 (stmt))) 4209 gimple_assign_rhs2 (stmt)))
1703 { 4210 {
1704 gsi_remove (&gsi, true); 4211 gsi_remove (&gsi, true);
1707 } 4214 }
1708 break; 4215 break;
1709 4216
1710 case PLUS_EXPR: 4217 case PLUS_EXPR:
1711 case MINUS_EXPR: 4218 case MINUS_EXPR:
1712 convert_plusminus_to_widen (&gsi, stmt, code); 4219 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4220 match_uaddsub_overflow (&gsi, stmt, code);
4221 break;
4222
4223 case TRUNC_MOD_EXPR:
4224 convert_to_divmod (as_a<gassign *> (stmt));
1713 break; 4225 break;
1714 4226
1715 default:; 4227 default:;
1716 } 4228 }
1717 } 4229 }
1718 else if (is_gimple_call (stmt) 4230 else if (is_gimple_call (stmt)
1719 && gimple_call_lhs (stmt)) 4231 && gimple_call_lhs (stmt))
1720 { 4232 {
1721 tree fndecl = gimple_call_fndecl (stmt); 4233 tree fndecl = gimple_call_fndecl (stmt);
1722 if (fndecl 4234 if (fndecl
1723 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) 4235 && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1724 { 4236 {
1725 switch (DECL_FUNCTION_CODE (fndecl)) 4237 switch (DECL_FUNCTION_CODE (fndecl))
1726 { 4238 {
1727 case BUILT_IN_POWF: 4239 case BUILT_IN_POWF:
1728 case BUILT_IN_POW: 4240 case BUILT_IN_POW:
1729 case BUILT_IN_POWL: 4241 case BUILT_IN_POWL:
1730 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST 4242 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
1731 && REAL_VALUES_EQUAL 4243 && real_equal
1732 (TREE_REAL_CST (gimple_call_arg (stmt, 1)), 4244 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
1733 dconst2) 4245 &dconst2)
1734 && convert_mult_to_fma (stmt, 4246 && convert_mult_to_fma (stmt,
1735 gimple_call_arg (stmt, 0), 4247 gimple_call_arg (stmt, 0),
1736 gimple_call_arg (stmt, 0))) 4248 gimple_call_arg (stmt, 0)))
1737 { 4249 {
1738 unlink_stmt_vdef (stmt); 4250 unlink_stmt_vdef (stmt);
1739 gsi_remove (&gsi, true); 4251 if (gsi_remove (&gsi, true)
4252 && gimple_purge_dead_eh_edges (bb))
4253 cfg_changed = true;
1740 release_defs (stmt); 4254 release_defs (stmt);
1741 if (gimple_purge_dead_eh_edges (bb))
1742 cfg_changed = true;
1743 continue; 4255 continue;
1744 } 4256 }
1745 break; 4257 break;
1746 4258
1747 default:; 4259 default:;
1750 } 4262 }
1751 gsi_next (&gsi); 4263 gsi_next (&gsi);
1752 } 4264 }
1753 } 4265 }
1754 4266
4267 statistics_counter_event (fun, "widening multiplications inserted",
4268 widen_mul_stats.widen_mults_inserted);
4269 statistics_counter_event (fun, "widening maccs inserted",
4270 widen_mul_stats.maccs_inserted);
4271 statistics_counter_event (fun, "fused multiply-adds inserted",
4272 widen_mul_stats.fmas_inserted);
4273 statistics_counter_event (fun, "divmod calls inserted",
4274 widen_mul_stats.divmod_calls_inserted);
4275
1755 return cfg_changed ? TODO_cleanup_cfg : 0; 4276 return cfg_changed ? TODO_cleanup_cfg : 0;
1756 } 4277 }
1757 4278
1758 static bool 4279 } // anon namespace
1759 gate_optimize_widening_mul (void) 4280
1760 { 4281 gimple_opt_pass *
1761 return flag_expensive_optimizations && optimize; 4282 make_pass_optimize_widening_mul (gcc::context *ctxt)
1762 } 4283 {
1763 4284 return new pass_optimize_widening_mul (ctxt);
1764 struct gimple_opt_pass pass_optimize_widening_mul = 4285 }
1765 {
1766 {
1767 GIMPLE_PASS,
1768 "widening_mul", /* name */
1769 gate_optimize_widening_mul, /* gate */
1770 execute_optimize_widening_mul, /* execute */
1771 NULL, /* sub */
1772 NULL, /* next */
1773 0, /* static_pass_number */
1774 TV_NONE, /* tv_id */
1775 PROP_ssa, /* properties_required */
1776 0, /* properties_provided */
1777 0, /* properties_destroyed */
1778 0, /* todo_flags_start */
1779 TODO_verify_ssa
1780 | TODO_verify_stmts
1781 | TODO_dump_func
1782 | TODO_update_ssa /* todo_flags_finish */
1783 }
1784 };