111
|
1 /* Scheduler hooks for IA-32 which implement CPU specific logic.
|
|
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify
|
|
7 it under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful,
|
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 GNU General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "backend.h"
|
|
24 #include "rtl.h"
|
|
25 #include "tree.h"
|
|
26 #include "cfghooks.h"
|
|
27 #include "tm_p.h"
|
|
28 #include "insn-config.h"
|
|
29 #include "insn-attr.h"
|
|
30 #include "recog.h"
|
|
31 #include "target.h"
|
|
32
|
|
33 /* Return the maximum number of instructions a cpu can issue. */
|
|
34
|
|
35 int
|
|
36 ix86_issue_rate (void)
|
|
37 {
|
|
38 switch (ix86_tune)
|
|
39 {
|
|
40 case PROCESSOR_PENTIUM:
|
|
41 case PROCESSOR_LAKEMONT:
|
|
42 case PROCESSOR_BONNELL:
|
|
43 case PROCESSOR_SILVERMONT:
|
|
44 case PROCESSOR_KNL:
|
|
45 case PROCESSOR_KNM:
|
|
46 case PROCESSOR_INTEL:
|
|
47 case PROCESSOR_K6:
|
|
48 case PROCESSOR_BTVER2:
|
|
49 case PROCESSOR_PENTIUM4:
|
|
50 case PROCESSOR_NOCONA:
|
|
51 return 2;
|
|
52
|
|
53 case PROCESSOR_PENTIUMPRO:
|
|
54 case PROCESSOR_ATHLON:
|
|
55 case PROCESSOR_K8:
|
|
56 case PROCESSOR_AMDFAM10:
|
|
57 case PROCESSOR_GENERIC:
|
|
58 case PROCESSOR_BTVER1:
|
|
59 return 3;
|
|
60
|
|
61 case PROCESSOR_BDVER1:
|
|
62 case PROCESSOR_BDVER2:
|
|
63 case PROCESSOR_BDVER3:
|
|
64 case PROCESSOR_BDVER4:
|
|
65 case PROCESSOR_ZNVER1:
|
|
66 case PROCESSOR_CORE2:
|
|
67 case PROCESSOR_NEHALEM:
|
|
68 case PROCESSOR_SANDYBRIDGE:
|
|
69 case PROCESSOR_HASWELL:
|
|
70 return 4;
|
|
71
|
|
72 default:
|
|
73 return 1;
|
|
74 }
|
|
75 }
|
|
76
|
|
77 /* Return true iff USE_INSN has a memory address with operands set by
|
|
78 SET_INSN. */
|
|
79
|
|
80 bool
|
|
81 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
|
|
82 {
|
|
83 int i;
|
|
84 extract_insn_cached (use_insn);
|
|
85 for (i = recog_data.n_operands - 1; i >= 0; --i)
|
|
86 if (MEM_P (recog_data.operand[i]))
|
|
87 {
|
|
88 rtx addr = XEXP (recog_data.operand[i], 0);
|
|
89 if (modified_in_p (addr, set_insn) != 0)
|
|
90 {
|
|
91 /* No AGI stall if SET_INSN is a push or pop and USE_INSN
|
|
92 has SP based memory (unless index reg is modified in a pop). */
|
|
93 rtx set = single_set (set_insn);
|
|
94 if (set
|
|
95 && (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))
|
|
96 || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))))
|
|
97 {
|
|
98 struct ix86_address parts;
|
|
99 if (ix86_decompose_address (addr, &parts)
|
|
100 && parts.base == stack_pointer_rtx
|
|
101 && (parts.index == NULL_RTX
|
|
102 || MEM_P (SET_DEST (set))
|
|
103 || !modified_in_p (parts.index, set_insn)))
|
|
104 return false;
|
|
105 }
|
|
106 return true;
|
|
107 }
|
|
108 return false;
|
|
109 }
|
|
110 return false;
|
|
111 }
|
|
112
|
|
113 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
|
|
114 by DEP_INSN and nothing set by DEP_INSN. */
|
|
115
|
|
116 static bool
|
|
117 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
|
|
118 {
|
|
119 rtx set, set2;
|
|
120
|
|
121 /* Simplify the test for uninteresting insns. */
|
|
122 if (insn_type != TYPE_SETCC
|
|
123 && insn_type != TYPE_ICMOV
|
|
124 && insn_type != TYPE_FCMOV
|
|
125 && insn_type != TYPE_IBR)
|
|
126 return false;
|
|
127
|
|
128 if ((set = single_set (dep_insn)) != 0)
|
|
129 {
|
|
130 set = SET_DEST (set);
|
|
131 set2 = NULL_RTX;
|
|
132 }
|
|
133 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
|
|
134 && XVECLEN (PATTERN (dep_insn), 0) == 2
|
|
135 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
|
|
136 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
|
|
137 {
|
|
138 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
|
|
139 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
|
|
140 }
|
|
141 else
|
|
142 return false;
|
|
143
|
|
144 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
|
|
145 return false;
|
|
146
|
|
147 /* This test is true if the dependent insn reads the flags but
|
|
148 not any other potentially set register. */
|
|
149 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
|
|
150 return false;
|
|
151
|
|
152 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
|
|
153 return false;
|
|
154
|
|
155 return true;
|
|
156 }
|
|
157
|
|
158 /* Helper function for exact_store_load_dependency.
|
|
159 Return true if addr is found in insn. */
|
|
160 static bool
|
|
161 exact_dependency_1 (rtx addr, rtx insn)
|
|
162 {
|
|
163 enum rtx_code code;
|
|
164 const char *format_ptr;
|
|
165 int i, j;
|
|
166
|
|
167 code = GET_CODE (insn);
|
|
168 switch (code)
|
|
169 {
|
|
170 case MEM:
|
|
171 if (rtx_equal_p (addr, insn))
|
|
172 return true;
|
|
173 break;
|
|
174 case REG:
|
|
175 CASE_CONST_ANY:
|
|
176 case SYMBOL_REF:
|
|
177 case CODE_LABEL:
|
|
178 case PC:
|
|
179 case CC0:
|
|
180 case EXPR_LIST:
|
|
181 return false;
|
|
182 default:
|
|
183 break;
|
|
184 }
|
|
185
|
|
186 format_ptr = GET_RTX_FORMAT (code);
|
|
187 for (i = 0; i < GET_RTX_LENGTH (code); i++)
|
|
188 {
|
|
189 switch (*format_ptr++)
|
|
190 {
|
|
191 case 'e':
|
|
192 if (exact_dependency_1 (addr, XEXP (insn, i)))
|
|
193 return true;
|
|
194 break;
|
|
195 case 'E':
|
|
196 for (j = 0; j < XVECLEN (insn, i); j++)
|
|
197 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
|
|
198 return true;
|
|
199 break;
|
|
200 }
|
|
201 }
|
|
202 return false;
|
|
203 }
|
|
204
|
|
205 /* Return true if there exists exact dependency for store & load, i.e.
|
|
206 the same memory address is used in them. */
|
|
207 static bool
|
|
208 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
|
|
209 {
|
|
210 rtx set1, set2;
|
|
211
|
|
212 set1 = single_set (store);
|
|
213 if (!set1)
|
|
214 return false;
|
|
215 if (!MEM_P (SET_DEST (set1)))
|
|
216 return false;
|
|
217 set2 = single_set (load);
|
|
218 if (!set2)
|
|
219 return false;
|
|
220 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
|
|
221 return true;
|
|
222 return false;
|
|
223 }
|
|
224
|
|
225
|
|
226 /* This function corrects the value of COST (latency) based on the relationship
|
|
227 between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength
|
|
228 DW. It should return the new value.
|
|
229
|
|
230 On x86 CPUs this is most commonly used to model the fact that valus of
|
|
231 registers used to compute address of memory operand needs to be ready
|
|
232 earlier than values of registers used in the actual operation. */
|
|
233
|
|
234 int
|
|
235 ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
|
|
236 unsigned int)
|
|
237 {
|
|
238 enum attr_type insn_type, dep_insn_type;
|
|
239 enum attr_memory memory;
|
|
240 rtx set, set2;
|
|
241 int dep_insn_code_number;
|
|
242
|
|
243 /* Anti and output dependencies have zero cost on all CPUs. */
|
|
244 if (dep_type != 0)
|
|
245 return 0;
|
|
246
|
|
247 dep_insn_code_number = recog_memoized (dep_insn);
|
|
248
|
|
249 /* If we can't recognize the insns, we can't really do anything. */
|
|
250 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
|
|
251 return cost;
|
|
252
|
|
253 insn_type = get_attr_type (insn);
|
|
254 dep_insn_type = get_attr_type (dep_insn);
|
|
255
|
|
256 switch (ix86_tune)
|
|
257 {
|
|
258 case PROCESSOR_PENTIUM:
|
|
259 case PROCESSOR_LAKEMONT:
|
|
260 /* Address Generation Interlock adds a cycle of latency. */
|
|
261 if (insn_type == TYPE_LEA)
|
|
262 {
|
|
263 rtx addr = PATTERN (insn);
|
|
264
|
|
265 if (GET_CODE (addr) == PARALLEL)
|
|
266 addr = XVECEXP (addr, 0, 0);
|
|
267
|
|
268 gcc_assert (GET_CODE (addr) == SET);
|
|
269
|
|
270 addr = SET_SRC (addr);
|
|
271 if (modified_in_p (addr, dep_insn))
|
|
272 cost += 1;
|
|
273 }
|
|
274 else if (ix86_agi_dependent (dep_insn, insn))
|
|
275 cost += 1;
|
|
276
|
|
277 /* ??? Compares pair with jump/setcc. */
|
|
278 if (ix86_flags_dependent (insn, dep_insn, insn_type))
|
|
279 cost = 0;
|
|
280
|
|
281 /* Floating point stores require value to be ready one cycle earlier. */
|
|
282 if (insn_type == TYPE_FMOV
|
|
283 && get_attr_memory (insn) == MEMORY_STORE
|
|
284 && !ix86_agi_dependent (dep_insn, insn))
|
|
285 cost += 1;
|
|
286 break;
|
|
287
|
|
288 case PROCESSOR_PENTIUMPRO:
|
|
289 /* INT->FP conversion is expensive. */
|
|
290 if (get_attr_fp_int_src (dep_insn))
|
|
291 cost += 5;
|
|
292
|
|
293 /* There is one cycle extra latency between an FP op and a store. */
|
|
294 if (insn_type == TYPE_FMOV
|
|
295 && (set = single_set (dep_insn)) != NULL_RTX
|
|
296 && (set2 = single_set (insn)) != NULL_RTX
|
|
297 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
|
|
298 && MEM_P (SET_DEST (set2)))
|
|
299 cost += 1;
|
|
300
|
|
301 memory = get_attr_memory (insn);
|
|
302
|
|
303 /* Show ability of reorder buffer to hide latency of load by executing
|
|
304 in parallel with previous instruction in case
|
|
305 previous instruction is not needed to compute the address. */
|
|
306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
307 && !ix86_agi_dependent (dep_insn, insn))
|
|
308 {
|
|
309 /* Claim moves to take one cycle, as core can issue one load
|
|
310 at time and the next load can start cycle later. */
|
|
311 if (dep_insn_type == TYPE_IMOV
|
|
312 || dep_insn_type == TYPE_FMOV)
|
|
313 cost = 1;
|
|
314 else if (cost > 1)
|
|
315 cost--;
|
|
316 }
|
|
317 break;
|
|
318
|
|
319 case PROCESSOR_K6:
|
|
320 /* The esp dependency is resolved before
|
|
321 the instruction is really finished. */
|
|
322 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
|
|
323 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
|
|
324 return 1;
|
|
325
|
|
326 /* INT->FP conversion is expensive. */
|
|
327 if (get_attr_fp_int_src (dep_insn))
|
|
328 cost += 5;
|
|
329
|
|
330 memory = get_attr_memory (insn);
|
|
331
|
|
332 /* Show ability of reorder buffer to hide latency of load by executing
|
|
333 in parallel with previous instruction in case
|
|
334 previous instruction is not needed to compute the address. */
|
|
335 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
336 && !ix86_agi_dependent (dep_insn, insn))
|
|
337 {
|
|
338 /* Claim moves to take one cycle, as core can issue one load
|
|
339 at time and the next load can start cycle later. */
|
|
340 if (dep_insn_type == TYPE_IMOV
|
|
341 || dep_insn_type == TYPE_FMOV)
|
|
342 cost = 1;
|
|
343 else if (cost > 2)
|
|
344 cost -= 2;
|
|
345 else
|
|
346 cost = 1;
|
|
347 }
|
|
348 break;
|
|
349
|
|
350 case PROCESSOR_AMDFAM10:
|
|
351 case PROCESSOR_BDVER1:
|
|
352 case PROCESSOR_BDVER2:
|
|
353 case PROCESSOR_BDVER3:
|
|
354 case PROCESSOR_BDVER4:
|
|
355 case PROCESSOR_BTVER1:
|
|
356 case PROCESSOR_BTVER2:
|
|
357 case PROCESSOR_GENERIC:
|
|
358 /* Stack engine allows to execute push&pop instructions in parall. */
|
|
359 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
|
|
360 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
|
|
361 return 0;
|
|
362 /* FALLTHRU */
|
|
363
|
|
364 case PROCESSOR_ATHLON:
|
|
365 case PROCESSOR_K8:
|
|
366 memory = get_attr_memory (insn);
|
|
367
|
|
368 /* Show ability of reorder buffer to hide latency of load by executing
|
|
369 in parallel with previous instruction in case
|
|
370 previous instruction is not needed to compute the address. */
|
|
371 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
372 && !ix86_agi_dependent (dep_insn, insn))
|
|
373 {
|
|
374 enum attr_unit unit = get_attr_unit (insn);
|
|
375 int loadcost = 3;
|
|
376
|
|
377 /* Because of the difference between the length of integer and
|
|
378 floating unit pipeline preparation stages, the memory operands
|
|
379 for floating point are cheaper.
|
|
380
|
|
381 ??? For Athlon it the difference is most probably 2. */
|
|
382 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
|
|
383 loadcost = 3;
|
|
384 else
|
|
385 loadcost = TARGET_ATHLON ? 2 : 0;
|
|
386
|
|
387 if (cost >= loadcost)
|
|
388 cost -= loadcost;
|
|
389 else
|
|
390 cost = 0;
|
|
391 }
|
|
392 break;
|
|
393
|
|
394 case PROCESSOR_ZNVER1:
|
|
395 /* Stack engine allows to execute push&pop instructions in parall. */
|
|
396 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
|
|
397 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
|
|
398 return 0;
|
|
399
|
|
400 memory = get_attr_memory (insn);
|
|
401
|
|
402 /* Show ability of reorder buffer to hide latency of load by executing
|
|
403 in parallel with previous instruction in case
|
|
404 previous instruction is not needed to compute the address. */
|
|
405 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
406 && !ix86_agi_dependent (dep_insn, insn))
|
|
407 {
|
|
408 enum attr_unit unit = get_attr_unit (insn);
|
|
409 int loadcost;
|
|
410
|
|
411 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
|
|
412 loadcost = 4;
|
|
413 else
|
|
414 loadcost = 7;
|
|
415
|
|
416 if (cost >= loadcost)
|
|
417 cost -= loadcost;
|
|
418 else
|
|
419 cost = 0;
|
|
420 }
|
|
421 break;
|
|
422
|
|
423 case PROCESSOR_CORE2:
|
|
424 case PROCESSOR_NEHALEM:
|
|
425 case PROCESSOR_SANDYBRIDGE:
|
|
426 case PROCESSOR_HASWELL:
|
|
427 /* Stack engine allows to execute push&pop instructions in parall. */
|
|
428 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
|
|
429 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
|
|
430 return 0;
|
|
431
|
|
432 memory = get_attr_memory (insn);
|
|
433
|
|
434 /* Show ability of reorder buffer to hide latency of load by executing
|
|
435 in parallel with previous instruction in case
|
|
436 previous instruction is not needed to compute the address. */
|
|
437 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
438 && !ix86_agi_dependent (dep_insn, insn))
|
|
439 {
|
|
440 if (cost >= 4)
|
|
441 cost -= 4;
|
|
442 else
|
|
443 cost = 0;
|
|
444 }
|
|
445 break;
|
|
446
|
|
447 case PROCESSOR_SILVERMONT:
|
|
448 case PROCESSOR_KNL:
|
|
449 case PROCESSOR_KNM:
|
|
450 case PROCESSOR_INTEL:
|
|
451 if (!reload_completed)
|
|
452 return cost;
|
|
453
|
|
454 /* Increase cost of integer loads. */
|
|
455 memory = get_attr_memory (dep_insn);
|
|
456 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
|
457 {
|
|
458 enum attr_unit unit = get_attr_unit (dep_insn);
|
|
459 if (unit == UNIT_INTEGER && cost == 1)
|
|
460 {
|
|
461 if (memory == MEMORY_LOAD)
|
|
462 cost = 3;
|
|
463 else
|
|
464 {
|
|
465 /* Increase cost of ld/st for short int types only
|
|
466 because of store forwarding issue. */
|
|
467 rtx set = single_set (dep_insn);
|
|
468 if (set && (GET_MODE (SET_DEST (set)) == QImode
|
|
469 || GET_MODE (SET_DEST (set)) == HImode))
|
|
470 {
|
|
471 /* Increase cost of store/load insn if exact
|
|
472 dependence exists and it is load insn. */
|
|
473 enum attr_memory insn_memory = get_attr_memory (insn);
|
|
474 if (insn_memory == MEMORY_LOAD
|
|
475 && exact_store_load_dependency (dep_insn, insn))
|
|
476 cost = 3;
|
|
477 }
|
|
478 }
|
|
479 }
|
|
480 }
|
|
481
|
|
482 default:
|
|
483 break;
|
|
484 }
|
|
485
|
|
486 return cost;
|
|
487 }
|
|
488
|
|
489 /* How many alternative schedules to try. This should be as wide as the
|
|
490 scheduling freedom in the DFA, but no wider. Making this value too
|
|
491 large results extra work for the scheduler. */
|
|
492
|
|
493 int
|
|
494 ia32_multipass_dfa_lookahead (void)
|
|
495 {
|
|
496 /* Generally, we want haifa-sched:max_issue() to look ahead as far
|
|
497 as many instructions can be executed on a cycle, i.e.,
|
|
498 issue_rate. */
|
|
499 if (reload_completed)
|
|
500 return ix86_issue_rate ();
|
|
501 /* Don't use lookahead for pre-reload schedule to save compile time. */
|
|
502 return 0;
|
|
503 }
|
|
504
|
|
505 /* Return true if target platform supports macro-fusion. */
|
|
506
|
|
507 bool
|
|
508 ix86_macro_fusion_p ()
|
|
509 {
|
|
510 return TARGET_FUSE_CMP_AND_BRANCH;
|
|
511 }
|
|
512
|
|
513 /* Check whether current microarchitecture support macro fusion
|
|
514 for insn pair "CONDGEN + CONDJMP". Refer to
|
|
515 "Intel Architectures Optimization Reference Manual". */
|
|
516
|
|
517 bool
|
|
518 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
|
|
519 {
|
|
520 rtx src, dest;
|
|
521 enum rtx_code ccode;
|
|
522 rtx compare_set = NULL_RTX, test_if, cond;
|
|
523 rtx alu_set = NULL_RTX, addr = NULL_RTX;
|
|
524
|
|
525 if (!any_condjump_p (condjmp))
|
|
526 return false;
|
|
527
|
|
528 unsigned int condreg1, condreg2;
|
|
529 rtx cc_reg_1;
|
|
530 targetm.fixed_condition_code_regs (&condreg1, &condreg2);
|
|
531 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
|
|
532 if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
|
|
533 || !condgen
|
|
534 || !modified_in_p (cc_reg_1, condgen))
|
|
535 return false;
|
|
536
|
|
537 if (get_attr_type (condgen) != TYPE_TEST
|
|
538 && get_attr_type (condgen) != TYPE_ICMP
|
|
539 && get_attr_type (condgen) != TYPE_INCDEC
|
|
540 && get_attr_type (condgen) != TYPE_ALU)
|
|
541 return false;
|
|
542
|
|
543 compare_set = single_set (condgen);
|
|
544 if (compare_set == NULL_RTX
|
|
545 && !TARGET_FUSE_ALU_AND_BRANCH)
|
|
546 return false;
|
|
547
|
|
548 if (compare_set == NULL_RTX)
|
|
549 {
|
|
550 int i;
|
|
551 rtx pat = PATTERN (condgen);
|
|
552 for (i = 0; i < XVECLEN (pat, 0); i++)
|
|
553 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
|
|
554 {
|
|
555 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
|
|
556 if (GET_CODE (set_src) == COMPARE)
|
|
557 compare_set = XVECEXP (pat, 0, i);
|
|
558 else
|
|
559 alu_set = XVECEXP (pat, 0, i);
|
|
560 }
|
|
561 }
|
|
562 if (compare_set == NULL_RTX)
|
|
563 return false;
|
|
564 src = SET_SRC (compare_set);
|
|
565 if (GET_CODE (src) != COMPARE)
|
|
566 return false;
|
|
567
|
|
568 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
|
|
569 supported. */
|
|
570 if ((MEM_P (XEXP (src, 0))
|
|
571 && CONST_INT_P (XEXP (src, 1)))
|
|
572 || (MEM_P (XEXP (src, 1))
|
|
573 && CONST_INT_P (XEXP (src, 0))))
|
|
574 return false;
|
|
575
|
|
576 /* No fusion for RIP-relative address. */
|
|
577 if (MEM_P (XEXP (src, 0)))
|
|
578 addr = XEXP (XEXP (src, 0), 0);
|
|
579 else if (MEM_P (XEXP (src, 1)))
|
|
580 addr = XEXP (XEXP (src, 1), 0);
|
|
581
|
|
582 if (addr) {
|
|
583 ix86_address parts;
|
|
584 int ok = ix86_decompose_address (addr, &parts);
|
|
585 gcc_assert (ok);
|
|
586
|
|
587 if (ix86_rip_relative_addr_p (&parts))
|
|
588 return false;
|
|
589 }
|
|
590
|
|
591 test_if = SET_SRC (pc_set (condjmp));
|
|
592 cond = XEXP (test_if, 0);
|
|
593 ccode = GET_CODE (cond);
|
|
594 /* Check whether conditional jump use Sign or Overflow Flags. */
|
|
595 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
|
|
596 && (ccode == GE
|
|
597 || ccode == GT
|
|
598 || ccode == LE
|
|
599 || ccode == LT))
|
|
600 return false;
|
|
601
|
|
602 /* Return true for TYPE_TEST and TYPE_ICMP. */
|
|
603 if (get_attr_type (condgen) == TYPE_TEST
|
|
604 || get_attr_type (condgen) == TYPE_ICMP)
|
|
605 return true;
|
|
606
|
|
607 /* The following is the case that macro-fusion for alu + jmp. */
|
|
608 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
|
|
609 return false;
|
|
610
|
|
611 /* No fusion for alu op with memory destination operand. */
|
|
612 dest = SET_DEST (alu_set);
|
|
613 if (MEM_P (dest))
|
|
614 return false;
|
|
615
|
|
616 /* Macro-fusion for inc/dec + unsigned conditional jump is not
|
|
617 supported. */
|
|
618 if (get_attr_type (condgen) == TYPE_INCDEC
|
|
619 && (ccode == GEU
|
|
620 || ccode == GTU
|
|
621 || ccode == LEU
|
|
622 || ccode == LTU))
|
|
623 return false;
|
|
624
|
|
625 return true;
|
|
626 }
|
|
627
|