Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/sparc/sparc.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 3bfb6c00c1e0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Subroutines for insn-output.c for SPARC. | |
2 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, | |
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 | |
4 Free Software Foundation, Inc. | |
5 Contributed by Michael Tiemann (tiemann@cygnus.com) | |
6 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, | |
7 at Cygnus Support. | |
8 | |
9 This file is part of GCC. | |
10 | |
11 GCC is free software; you can redistribute it and/or modify | |
12 it under the terms of the GNU General Public License as published by | |
13 the Free Software Foundation; either version 3, or (at your option) | |
14 any later version. | |
15 | |
16 GCC is distributed in the hope that it will be useful, | |
17 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 GNU General Public License for more details. | |
20 | |
21 You should have received a copy of the GNU General Public License | |
22 along with GCC; see the file COPYING3. If not see | |
23 <http://www.gnu.org/licenses/>. */ | |
24 | |
25 #include "config.h" | |
26 #include "system.h" | |
27 #include "coretypes.h" | |
28 #include "tm.h" | |
29 #include "tree.h" | |
30 #include "rtl.h" | |
31 #include "regs.h" | |
32 #include "hard-reg-set.h" | |
33 #include "real.h" | |
34 #include "insn-config.h" | |
35 #include "insn-codes.h" | |
36 #include "conditions.h" | |
37 #include "output.h" | |
38 #include "insn-attr.h" | |
39 #include "flags.h" | |
40 #include "function.h" | |
41 #include "expr.h" | |
42 #include "optabs.h" | |
43 #include "recog.h" | |
44 #include "toplev.h" | |
45 #include "ggc.h" | |
46 #include "tm_p.h" | |
47 #include "debug.h" | |
48 #include "target.h" | |
49 #include "target-def.h" | |
50 #include "cfglayout.h" | |
51 #include "gimple.h" | |
52 #include "langhooks.h" | |
53 #include "params.h" | |
54 #include "df.h" | |
55 | |
56 /* Processor costs */ | |
57 static const | |
58 struct processor_costs cypress_costs = { | |
59 COSTS_N_INSNS (2), /* int load */ | |
60 COSTS_N_INSNS (2), /* int signed load */ | |
61 COSTS_N_INSNS (2), /* int zeroed load */ | |
62 COSTS_N_INSNS (2), /* float load */ | |
63 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ | |
64 COSTS_N_INSNS (5), /* fadd, fsub */ | |
65 COSTS_N_INSNS (1), /* fcmp */ | |
66 COSTS_N_INSNS (1), /* fmov, fmovr */ | |
67 COSTS_N_INSNS (7), /* fmul */ | |
68 COSTS_N_INSNS (37), /* fdivs */ | |
69 COSTS_N_INSNS (37), /* fdivd */ | |
70 COSTS_N_INSNS (63), /* fsqrts */ | |
71 COSTS_N_INSNS (63), /* fsqrtd */ | |
72 COSTS_N_INSNS (1), /* imul */ | |
73 COSTS_N_INSNS (1), /* imulX */ | |
74 0, /* imul bit factor */ | |
75 COSTS_N_INSNS (1), /* idiv */ | |
76 COSTS_N_INSNS (1), /* idivX */ | |
77 COSTS_N_INSNS (1), /* movcc/movr */ | |
78 0, /* shift penalty */ | |
79 }; | |
80 | |
81 static const | |
82 struct processor_costs supersparc_costs = { | |
83 COSTS_N_INSNS (1), /* int load */ | |
84 COSTS_N_INSNS (1), /* int signed load */ | |
85 COSTS_N_INSNS (1), /* int zeroed load */ | |
86 COSTS_N_INSNS (0), /* float load */ | |
87 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ | |
88 COSTS_N_INSNS (3), /* fadd, fsub */ | |
89 COSTS_N_INSNS (3), /* fcmp */ | |
90 COSTS_N_INSNS (1), /* fmov, fmovr */ | |
91 COSTS_N_INSNS (3), /* fmul */ | |
92 COSTS_N_INSNS (6), /* fdivs */ | |
93 COSTS_N_INSNS (9), /* fdivd */ | |
94 COSTS_N_INSNS (12), /* fsqrts */ | |
95 COSTS_N_INSNS (12), /* fsqrtd */ | |
96 COSTS_N_INSNS (4), /* imul */ | |
97 COSTS_N_INSNS (4), /* imulX */ | |
98 0, /* imul bit factor */ | |
99 COSTS_N_INSNS (4), /* idiv */ | |
100 COSTS_N_INSNS (4), /* idivX */ | |
101 COSTS_N_INSNS (1), /* movcc/movr */ | |
102 1, /* shift penalty */ | |
103 }; | |
104 | |
105 static const | |
106 struct processor_costs hypersparc_costs = { | |
107 COSTS_N_INSNS (1), /* int load */ | |
108 COSTS_N_INSNS (1), /* int signed load */ | |
109 COSTS_N_INSNS (1), /* int zeroed load */ | |
110 COSTS_N_INSNS (1), /* float load */ | |
111 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ | |
112 COSTS_N_INSNS (1), /* fadd, fsub */ | |
113 COSTS_N_INSNS (1), /* fcmp */ | |
114 COSTS_N_INSNS (1), /* fmov, fmovr */ | |
115 COSTS_N_INSNS (1), /* fmul */ | |
116 COSTS_N_INSNS (8), /* fdivs */ | |
117 COSTS_N_INSNS (12), /* fdivd */ | |
118 COSTS_N_INSNS (17), /* fsqrts */ | |
119 COSTS_N_INSNS (17), /* fsqrtd */ | |
120 COSTS_N_INSNS (17), /* imul */ | |
121 COSTS_N_INSNS (17), /* imulX */ | |
122 0, /* imul bit factor */ | |
123 COSTS_N_INSNS (17), /* idiv */ | |
124 COSTS_N_INSNS (17), /* idivX */ | |
125 COSTS_N_INSNS (1), /* movcc/movr */ | |
126 0, /* shift penalty */ | |
127 }; | |
128 | |
129 static const | |
130 struct processor_costs sparclet_costs = { | |
131 COSTS_N_INSNS (3), /* int load */ | |
132 COSTS_N_INSNS (3), /* int signed load */ | |
133 COSTS_N_INSNS (1), /* int zeroed load */ | |
134 COSTS_N_INSNS (1), /* float load */ | |
135 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ | |
136 COSTS_N_INSNS (1), /* fadd, fsub */ | |
137 COSTS_N_INSNS (1), /* fcmp */ | |
138 COSTS_N_INSNS (1), /* fmov, fmovr */ | |
139 COSTS_N_INSNS (1), /* fmul */ | |
140 COSTS_N_INSNS (1), /* fdivs */ | |
141 COSTS_N_INSNS (1), /* fdivd */ | |
142 COSTS_N_INSNS (1), /* fsqrts */ | |
143 COSTS_N_INSNS (1), /* fsqrtd */ | |
144 COSTS_N_INSNS (5), /* imul */ | |
145 COSTS_N_INSNS (5), /* imulX */ | |
146 0, /* imul bit factor */ | |
147 COSTS_N_INSNS (5), /* idiv */ | |
148 COSTS_N_INSNS (5), /* idivX */ | |
149 COSTS_N_INSNS (1), /* movcc/movr */ | |
150 0, /* shift penalty */ | |
151 }; | |
152 | |
153 static const | |
154 struct processor_costs ultrasparc_costs = { | |
155 COSTS_N_INSNS (2), /* int load */ | |
156 COSTS_N_INSNS (3), /* int signed load */ | |
157 COSTS_N_INSNS (2), /* int zeroed load */ | |
158 COSTS_N_INSNS (2), /* float load */ | |
159 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ | |
160 COSTS_N_INSNS (4), /* fadd, fsub */ | |
161 COSTS_N_INSNS (1), /* fcmp */ | |
162 COSTS_N_INSNS (2), /* fmov, fmovr */ | |
163 COSTS_N_INSNS (4), /* fmul */ | |
164 COSTS_N_INSNS (13), /* fdivs */ | |
165 COSTS_N_INSNS (23), /* fdivd */ | |
166 COSTS_N_INSNS (13), /* fsqrts */ | |
167 COSTS_N_INSNS (23), /* fsqrtd */ | |
168 COSTS_N_INSNS (4), /* imul */ | |
169 COSTS_N_INSNS (4), /* imulX */ | |
170 2, /* imul bit factor */ | |
171 COSTS_N_INSNS (37), /* idiv */ | |
172 COSTS_N_INSNS (68), /* idivX */ | |
173 COSTS_N_INSNS (2), /* movcc/movr */ | |
174 2, /* shift penalty */ | |
175 }; | |
176 | |
177 static const | |
178 struct processor_costs ultrasparc3_costs = { | |
179 COSTS_N_INSNS (2), /* int load */ | |
180 COSTS_N_INSNS (3), /* int signed load */ | |
181 COSTS_N_INSNS (3), /* int zeroed load */ | |
182 COSTS_N_INSNS (2), /* float load */ | |
183 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ | |
184 COSTS_N_INSNS (4), /* fadd, fsub */ | |
185 COSTS_N_INSNS (5), /* fcmp */ | |
186 COSTS_N_INSNS (3), /* fmov, fmovr */ | |
187 COSTS_N_INSNS (4), /* fmul */ | |
188 COSTS_N_INSNS (17), /* fdivs */ | |
189 COSTS_N_INSNS (20), /* fdivd */ | |
190 COSTS_N_INSNS (20), /* fsqrts */ | |
191 COSTS_N_INSNS (29), /* fsqrtd */ | |
192 COSTS_N_INSNS (6), /* imul */ | |
193 COSTS_N_INSNS (6), /* imulX */ | |
194 0, /* imul bit factor */ | |
195 COSTS_N_INSNS (40), /* idiv */ | |
196 COSTS_N_INSNS (71), /* idivX */ | |
197 COSTS_N_INSNS (2), /* movcc/movr */ | |
198 0, /* shift penalty */ | |
199 }; | |
200 | |
201 static const | |
202 struct processor_costs niagara_costs = { | |
203 COSTS_N_INSNS (3), /* int load */ | |
204 COSTS_N_INSNS (3), /* int signed load */ | |
205 COSTS_N_INSNS (3), /* int zeroed load */ | |
206 COSTS_N_INSNS (9), /* float load */ | |
207 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ | |
208 COSTS_N_INSNS (8), /* fadd, fsub */ | |
209 COSTS_N_INSNS (26), /* fcmp */ | |
210 COSTS_N_INSNS (8), /* fmov, fmovr */ | |
211 COSTS_N_INSNS (29), /* fmul */ | |
212 COSTS_N_INSNS (54), /* fdivs */ | |
213 COSTS_N_INSNS (83), /* fdivd */ | |
214 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ | |
215 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ | |
216 COSTS_N_INSNS (11), /* imul */ | |
217 COSTS_N_INSNS (11), /* imulX */ | |
218 0, /* imul bit factor */ | |
219 COSTS_N_INSNS (72), /* idiv */ | |
220 COSTS_N_INSNS (72), /* idivX */ | |
221 COSTS_N_INSNS (1), /* movcc/movr */ | |
222 0, /* shift penalty */ | |
223 }; | |
224 | |
225 static const | |
226 struct processor_costs niagara2_costs = { | |
227 COSTS_N_INSNS (3), /* int load */ | |
228 COSTS_N_INSNS (3), /* int signed load */ | |
229 COSTS_N_INSNS (3), /* int zeroed load */ | |
230 COSTS_N_INSNS (3), /* float load */ | |
231 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ | |
232 COSTS_N_INSNS (6), /* fadd, fsub */ | |
233 COSTS_N_INSNS (6), /* fcmp */ | |
234 COSTS_N_INSNS (6), /* fmov, fmovr */ | |
235 COSTS_N_INSNS (6), /* fmul */ | |
236 COSTS_N_INSNS (19), /* fdivs */ | |
237 COSTS_N_INSNS (33), /* fdivd */ | |
238 COSTS_N_INSNS (19), /* fsqrts */ | |
239 COSTS_N_INSNS (33), /* fsqrtd */ | |
240 COSTS_N_INSNS (5), /* imul */ | |
241 COSTS_N_INSNS (5), /* imulX */ | |
242 0, /* imul bit factor */ | |
243 COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */ | |
244 COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */ | |
245 COSTS_N_INSNS (1), /* movcc/movr */ | |
246 0, /* shift penalty */ | |
247 }; | |
248 | |
249 const struct processor_costs *sparc_costs = &cypress_costs; | |
250 | |
251 #ifdef HAVE_AS_RELAX_OPTION | |
252 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use | |
253 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. | |
254 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if | |
255 somebody does not branch between the sethi and jmp. */ | |
256 #define LEAF_SIBCALL_SLOT_RESERVED_P 1 | |
257 #else | |
258 #define LEAF_SIBCALL_SLOT_RESERVED_P \ | |
259 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) | |
260 #endif | |
261 | |
262 /* Global variables for machine-dependent things. */ | |
263 | |
264 /* Size of frame. Need to know this to emit return insns from leaf procedures. | |
265 ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the | |
266 reload pass. This is important as the value is later used for scheduling | |
267 (to see what can go in a delay slot). | |
268 APPARENT_FSIZE is the size of the stack less the register save area and less | |
269 the outgoing argument area. It is used when saving call preserved regs. */ | |
270 static HOST_WIDE_INT apparent_fsize; | |
271 static HOST_WIDE_INT actual_fsize; | |
272 | |
273 /* Number of live general or floating point registers needed to be | |
274 saved (as 4-byte quantities). */ | |
275 static int num_gfregs; | |
276 | |
277 /* The alias set for prologue/epilogue register save/restore. */ | |
278 static GTY(()) alias_set_type sparc_sr_alias_set; | |
279 | |
280 /* The alias set for the structure return value. */ | |
281 static GTY(()) alias_set_type struct_value_alias_set; | |
282 | |
283 /* Save the operands last given to a compare for use when we | |
284 generate a scc or bcc insn. */ | |
285 rtx sparc_compare_op0, sparc_compare_op1, sparc_compare_emitted; | |
286 | |
287 /* Vector to say how input registers are mapped to output registers. | |
288 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to | |
289 eliminate it. You must use -fomit-frame-pointer to get that. */ | |
290 char leaf_reg_remap[] = | |
291 { 0, 1, 2, 3, 4, 5, 6, 7, | |
292 -1, -1, -1, -1, -1, -1, 14, -1, | |
293 -1, -1, -1, -1, -1, -1, -1, -1, | |
294 8, 9, 10, 11, 12, 13, -1, 15, | |
295 | |
296 32, 33, 34, 35, 36, 37, 38, 39, | |
297 40, 41, 42, 43, 44, 45, 46, 47, | |
298 48, 49, 50, 51, 52, 53, 54, 55, | |
299 56, 57, 58, 59, 60, 61, 62, 63, | |
300 64, 65, 66, 67, 68, 69, 70, 71, | |
301 72, 73, 74, 75, 76, 77, 78, 79, | |
302 80, 81, 82, 83, 84, 85, 86, 87, | |
303 88, 89, 90, 91, 92, 93, 94, 95, | |
304 96, 97, 98, 99, 100}; | |
305 | |
306 /* Vector, indexed by hard register number, which contains 1 | |
307 for a register that is allowable in a candidate for leaf | |
308 function treatment. */ | |
309 char sparc_leaf_regs[] = | |
310 { 1, 1, 1, 1, 1, 1, 1, 1, | |
311 0, 0, 0, 0, 0, 0, 1, 0, | |
312 0, 0, 0, 0, 0, 0, 0, 0, | |
313 1, 1, 1, 1, 1, 1, 0, 1, | |
314 1, 1, 1, 1, 1, 1, 1, 1, | |
315 1, 1, 1, 1, 1, 1, 1, 1, | |
316 1, 1, 1, 1, 1, 1, 1, 1, | |
317 1, 1, 1, 1, 1, 1, 1, 1, | |
318 1, 1, 1, 1, 1, 1, 1, 1, | |
319 1, 1, 1, 1, 1, 1, 1, 1, | |
320 1, 1, 1, 1, 1, 1, 1, 1, | |
321 1, 1, 1, 1, 1, 1, 1, 1, | |
322 1, 1, 1, 1, 1}; | |
323 | |
324 struct machine_function GTY(()) | |
325 { | |
326 /* Some local-dynamic TLS symbol name. */ | |
327 const char *some_ld_name; | |
328 | |
329 /* True if the current function is leaf and uses only leaf regs, | |
330 so that the SPARC leaf function optimization can be applied. | |
331 Private version of current_function_uses_only_leaf_regs, see | |
332 sparc_expand_prologue for the rationale. */ | |
333 int leaf_function_p; | |
334 | |
335 /* True if the data calculated by sparc_expand_prologue are valid. */ | |
336 bool prologue_data_valid_p; | |
337 }; | |
338 | |
339 #define sparc_leaf_function_p cfun->machine->leaf_function_p | |
340 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p | |
341 | |
342 /* Register we pretend to think the frame pointer is allocated to. | |
343 Normally, this is %fp, but if we are in a leaf procedure, this | |
344 is %sp+"something". We record "something" separately as it may | |
345 be too big for reg+constant addressing. */ | |
346 static rtx frame_base_reg; | |
347 static HOST_WIDE_INT frame_base_offset; | |
348 | |
349 /* 1 if the next opcode is to be specially indented. */ | |
350 int sparc_indent_opcode = 0; | |
351 | |
352 static bool sparc_handle_option (size_t, const char *, int); | |
353 static void sparc_init_modes (void); | |
354 static void scan_record_type (tree, int *, int *, int *); | |
355 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode, | |
356 tree, int, int, int *, int *); | |
357 | |
358 static int supersparc_adjust_cost (rtx, rtx, rtx, int); | |
359 static int hypersparc_adjust_cost (rtx, rtx, rtx, int); | |
360 | |
361 static void sparc_output_addr_vec (rtx); | |
362 static void sparc_output_addr_diff_vec (rtx); | |
363 static void sparc_output_deferred_case_vectors (void); | |
364 static rtx sparc_builtin_saveregs (void); | |
365 static int epilogue_renumber (rtx *, int); | |
366 static bool sparc_assemble_integer (rtx, unsigned int, int); | |
367 static int set_extends (rtx); | |
368 static void emit_pic_helper (void); | |
369 static void load_pic_register (bool); | |
370 static int save_or_restore_regs (int, int, rtx, int, int); | |
371 static void emit_save_or_restore_regs (int); | |
372 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT); | |
373 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT); | |
374 #ifdef OBJECT_FORMAT_ELF | |
375 static void sparc_elf_asm_named_section (const char *, unsigned int, tree); | |
376 #endif | |
377 | |
378 static int sparc_adjust_cost (rtx, rtx, rtx, int); | |
379 static int sparc_issue_rate (void); | |
380 static void sparc_sched_init (FILE *, int, int); | |
381 static int sparc_use_sched_lookahead (void); | |
382 | |
383 static void emit_soft_tfmode_libcall (const char *, int, rtx *); | |
384 static void emit_soft_tfmode_binop (enum rtx_code, rtx *); | |
385 static void emit_soft_tfmode_unop (enum rtx_code, rtx *); | |
386 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); | |
387 static void emit_hard_tfmode_operation (enum rtx_code, rtx *); | |
388 | |
389 static bool sparc_function_ok_for_sibcall (tree, tree); | |
390 static void sparc_init_libfuncs (void); | |
391 static void sparc_init_builtins (void); | |
392 static void sparc_vis_init_builtins (void); | |
393 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int); | |
394 static tree sparc_fold_builtin (tree, tree, bool); | |
395 static int sparc_vis_mul8x16 (int, int); | |
396 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree); | |
397 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, | |
398 HOST_WIDE_INT, tree); | |
399 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, | |
400 HOST_WIDE_INT, const_tree); | |
401 static struct machine_function * sparc_init_machine_status (void); | |
402 static bool sparc_cannot_force_const_mem (rtx); | |
403 static rtx sparc_tls_get_addr (void); | |
404 static rtx sparc_tls_got (void); | |
405 static const char *get_some_local_dynamic_name (void); | |
406 static int get_some_local_dynamic_name_1 (rtx *, void *); | |
407 static bool sparc_rtx_costs (rtx, int, int, int *, bool); | |
408 static bool sparc_promote_prototypes (const_tree); | |
409 static rtx sparc_struct_value_rtx (tree, int); | |
410 static bool sparc_return_in_memory (const_tree, const_tree); | |
411 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *); | |
412 static void sparc_va_start (tree, rtx); | |
413 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); | |
414 static bool sparc_vector_mode_supported_p (enum machine_mode); | |
415 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *, | |
416 enum machine_mode, const_tree, bool); | |
417 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *, | |
418 enum machine_mode, tree, bool); | |
419 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int); | |
420 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; | |
421 static void sparc_file_end (void); | |
422 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING | |
423 static const char *sparc_mangle_type (const_tree); | |
424 #endif | |
425 #ifdef SUBTARGET_ATTRIBUTE_TABLE | |
426 const struct attribute_spec sparc_attribute_table[]; | |
427 #endif | |
428 | |
429 /* Option handling. */ | |
430 | |
431 /* Parsed value. */ | |
432 enum cmodel sparc_cmodel; | |
433 | |
434 char sparc_hard_reg_printed[8]; | |
435 | |
436 struct sparc_cpu_select sparc_select[] = | |
437 { | |
438 /* switch name, tune arch */ | |
439 { (char *)0, "default", 1, 1 }, | |
440 { (char *)0, "-mcpu=", 1, 1 }, | |
441 { (char *)0, "-mtune=", 1, 0 }, | |
442 { 0, 0, 0, 0 } | |
443 }; | |
444 | |
445 /* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */ | |
446 enum processor_type sparc_cpu; | |
447 | |
448 /* Whetheran FPU option was specified. */ | |
449 static bool fpu_option_set = false; | |
450 | |
451 /* Initialize the GCC target structure. */ | |
452 | |
453 /* The sparc default is to use .half rather than .short for aligned | |
454 HI objects. Use .word instead of .long on non-ELF systems. */ | |
455 #undef TARGET_ASM_ALIGNED_HI_OP | |
456 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" | |
457 #ifndef OBJECT_FORMAT_ELF | |
458 #undef TARGET_ASM_ALIGNED_SI_OP | |
459 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" | |
460 #endif | |
461 | |
462 #undef TARGET_ASM_UNALIGNED_HI_OP | |
463 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" | |
464 #undef TARGET_ASM_UNALIGNED_SI_OP | |
465 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" | |
466 #undef TARGET_ASM_UNALIGNED_DI_OP | |
467 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" | |
468 | |
469 /* The target hook has to handle DI-mode values. */ | |
470 #undef TARGET_ASM_INTEGER | |
471 #define TARGET_ASM_INTEGER sparc_assemble_integer | |
472 | |
473 #undef TARGET_ASM_FUNCTION_PROLOGUE | |
474 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue | |
475 #undef TARGET_ASM_FUNCTION_EPILOGUE | |
476 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue | |
477 | |
478 #undef TARGET_SCHED_ADJUST_COST | |
479 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost | |
480 #undef TARGET_SCHED_ISSUE_RATE | |
481 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate | |
482 #undef TARGET_SCHED_INIT | |
483 #define TARGET_SCHED_INIT sparc_sched_init | |
484 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD | |
485 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead | |
486 | |
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL | |
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall | |
489 | |
490 #undef TARGET_INIT_LIBFUNCS | |
491 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs | |
492 #undef TARGET_INIT_BUILTINS | |
493 #define TARGET_INIT_BUILTINS sparc_init_builtins | |
494 | |
495 #undef TARGET_EXPAND_BUILTIN | |
496 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin | |
497 #undef TARGET_FOLD_BUILTIN | |
498 #define TARGET_FOLD_BUILTIN sparc_fold_builtin | |
499 | |
500 #if TARGET_TLS | |
501 #undef TARGET_HAVE_TLS | |
502 #define TARGET_HAVE_TLS true | |
503 #endif | |
504 | |
505 #undef TARGET_CANNOT_FORCE_CONST_MEM | |
506 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem | |
507 | |
508 #undef TARGET_ASM_OUTPUT_MI_THUNK | |
509 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk | |
510 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK | |
511 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk | |
512 | |
513 #undef TARGET_RTX_COSTS | |
514 #define TARGET_RTX_COSTS sparc_rtx_costs | |
515 #undef TARGET_ADDRESS_COST | |
516 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0 | |
517 | |
518 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a | |
519 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime | |
520 test for this value. */ | |
521 #undef TARGET_PROMOTE_FUNCTION_ARGS | |
522 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true | |
523 | |
524 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a | |
525 no-op for TARGET_ARCH32 this is ok. Otherwise we'd need to add a runtime | |
526 test for this value. */ | |
527 #undef TARGET_PROMOTE_FUNCTION_RETURN | |
528 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true | |
529 | |
530 #undef TARGET_PROMOTE_PROTOTYPES | |
531 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes | |
532 | |
533 #undef TARGET_STRUCT_VALUE_RTX | |
534 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx | |
535 #undef TARGET_RETURN_IN_MEMORY | |
536 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory | |
537 #undef TARGET_MUST_PASS_IN_STACK | |
538 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size | |
539 #undef TARGET_PASS_BY_REFERENCE | |
540 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference | |
541 #undef TARGET_ARG_PARTIAL_BYTES | |
542 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes | |
543 | |
544 #undef TARGET_EXPAND_BUILTIN_SAVEREGS | |
545 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs | |
546 #undef TARGET_STRICT_ARGUMENT_NAMING | |
547 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming | |
548 | |
549 #undef TARGET_EXPAND_BUILTIN_VA_START | |
550 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start | |
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR | |
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg | |
553 | |
554 #undef TARGET_VECTOR_MODE_SUPPORTED_P | |
555 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p | |
556 | |
557 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC | |
558 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec | |
559 | |
560 #ifdef SUBTARGET_INSERT_ATTRIBUTES | |
561 #undef TARGET_INSERT_ATTRIBUTES | |
562 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES | |
563 #endif | |
564 | |
565 #ifdef SUBTARGET_ATTRIBUTE_TABLE | |
566 #undef TARGET_ATTRIBUTE_TABLE | |
567 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table | |
568 #endif | |
569 | |
570 #undef TARGET_RELAXED_ORDERING | |
571 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING | |
572 | |
573 #undef TARGET_DEFAULT_TARGET_FLAGS | |
574 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT | |
575 #undef TARGET_HANDLE_OPTION | |
576 #define TARGET_HANDLE_OPTION sparc_handle_option | |
577 | |
578 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) | |
579 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL | |
580 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel | |
581 #endif | |
582 | |
583 #undef TARGET_ASM_FILE_END | |
584 #define TARGET_ASM_FILE_END sparc_file_end | |
585 | |
586 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING | |
587 #undef TARGET_MANGLE_TYPE | |
588 #define TARGET_MANGLE_TYPE sparc_mangle_type | |
589 #endif | |
590 | |
591 struct gcc_target targetm = TARGET_INITIALIZER; | |
592 | |
593 /* Implement TARGET_HANDLE_OPTION. */ | |
594 | |
595 static bool | |
596 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) | |
597 { | |
598 switch (code) | |
599 { | |
600 case OPT_mfpu: | |
601 case OPT_mhard_float: | |
602 case OPT_msoft_float: | |
603 fpu_option_set = true; | |
604 break; | |
605 | |
606 case OPT_mcpu_: | |
607 sparc_select[1].string = arg; | |
608 break; | |
609 | |
610 case OPT_mtune_: | |
611 sparc_select[2].string = arg; | |
612 break; | |
613 } | |
614 | |
615 return true; | |
616 } | |
617 | |
618 /* Validate and override various options, and do some machine dependent | |
619 initialization. */ | |
620 | |
621 void | |
622 sparc_override_options (void) | |
623 { | |
624 static struct code_model { | |
625 const char *const name; | |
626 const int value; | |
627 } const cmodels[] = { | |
628 { "32", CM_32 }, | |
629 { "medlow", CM_MEDLOW }, | |
630 { "medmid", CM_MEDMID }, | |
631 { "medany", CM_MEDANY }, | |
632 { "embmedany", CM_EMBMEDANY }, | |
633 { 0, 0 } | |
634 }; | |
635 const struct code_model *cmodel; | |
636 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */ | |
637 static struct cpu_default { | |
638 const int cpu; | |
639 const char *const name; | |
640 } const cpu_default[] = { | |
641 /* There must be one entry here for each TARGET_CPU value. */ | |
642 { TARGET_CPU_sparc, "cypress" }, | |
643 { TARGET_CPU_sparclet, "tsc701" }, | |
644 { TARGET_CPU_sparclite, "f930" }, | |
645 { TARGET_CPU_v8, "v8" }, | |
646 { TARGET_CPU_hypersparc, "hypersparc" }, | |
647 { TARGET_CPU_sparclite86x, "sparclite86x" }, | |
648 { TARGET_CPU_supersparc, "supersparc" }, | |
649 { TARGET_CPU_v9, "v9" }, | |
650 { TARGET_CPU_ultrasparc, "ultrasparc" }, | |
651 { TARGET_CPU_ultrasparc3, "ultrasparc3" }, | |
652 { TARGET_CPU_niagara, "niagara" }, | |
653 { TARGET_CPU_niagara2, "niagara2" }, | |
654 { 0, 0 } | |
655 }; | |
656 const struct cpu_default *def; | |
657 /* Table of values for -m{cpu,tune}=. */ | |
658 static struct cpu_table { | |
659 const char *const name; | |
660 const enum processor_type processor; | |
661 const int disable; | |
662 const int enable; | |
663 } const cpu_table[] = { | |
664 { "v7", PROCESSOR_V7, MASK_ISA, 0 }, | |
665 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 }, | |
666 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 }, | |
667 /* TI TMS390Z55 supersparc */ | |
668 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 }, | |
669 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE }, | |
670 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu. | |
671 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */ | |
672 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE }, | |
673 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU }, | |
674 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU }, | |
675 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, | |
676 MASK_SPARCLITE }, | |
677 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET }, | |
678 /* TEMIC sparclet */ | |
679 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET }, | |
680 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 }, | |
681 /* TI ultrasparc I, II, IIi */ | |
682 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 | |
683 /* Although insns using %y are deprecated, it is a clear win on current | |
684 ultrasparcs. */ | |
685 |MASK_DEPRECATED_V8_INSNS}, | |
686 /* TI ultrasparc III */ | |
687 /* ??? Check if %y issue still holds true in ultra3. */ | |
688 { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS}, | |
689 /* UltraSPARC T1 */ | |
690 { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS}, | |
691 { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9}, | |
692 { 0, 0, 0, 0 } | |
693 }; | |
694 const struct cpu_table *cpu; | |
695 const struct sparc_cpu_select *sel; | |
696 int fpu; | |
697 | |
698 #ifndef SPARC_BI_ARCH | |
699 /* Check for unsupported architecture size. */ | |
700 if (! TARGET_64BIT != DEFAULT_ARCH32_P) | |
701 error ("%s is not supported by this configuration", | |
702 DEFAULT_ARCH32_P ? "-m64" : "-m32"); | |
703 #endif | |
704 | |
705 /* We force all 64bit archs to use 128 bit long double */ | |
706 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128) | |
707 { | |
708 error ("-mlong-double-64 not allowed with -m64"); | |
709 target_flags |= MASK_LONG_DOUBLE_128; | |
710 } | |
711 | |
712 /* Code model selection. */ | |
713 sparc_cmodel = SPARC_DEFAULT_CMODEL; | |
714 | |
715 #ifdef SPARC_BI_ARCH | |
716 if (TARGET_ARCH32) | |
717 sparc_cmodel = CM_32; | |
718 #endif | |
719 | |
720 if (sparc_cmodel_string != NULL) | |
721 { | |
722 if (TARGET_ARCH64) | |
723 { | |
724 for (cmodel = &cmodels[0]; cmodel->name; cmodel++) | |
725 if (strcmp (sparc_cmodel_string, cmodel->name) == 0) | |
726 break; | |
727 if (cmodel->name == NULL) | |
728 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string); | |
729 else | |
730 sparc_cmodel = cmodel->value; | |
731 } | |
732 else | |
733 error ("-mcmodel= is not supported on 32 bit systems"); | |
734 } | |
735 | |
736 fpu = target_flags & MASK_FPU; /* save current -mfpu status */ | |
737 | |
738 /* Set the default CPU. */ | |
739 for (def = &cpu_default[0]; def->name; ++def) | |
740 if (def->cpu == TARGET_CPU_DEFAULT) | |
741 break; | |
742 gcc_assert (def->name); | |
743 sparc_select[0].string = def->name; | |
744 | |
745 for (sel = &sparc_select[0]; sel->name; ++sel) | |
746 { | |
747 if (sel->string) | |
748 { | |
749 for (cpu = &cpu_table[0]; cpu->name; ++cpu) | |
750 if (! strcmp (sel->string, cpu->name)) | |
751 { | |
752 if (sel->set_tune_p) | |
753 sparc_cpu = cpu->processor; | |
754 | |
755 if (sel->set_arch_p) | |
756 { | |
757 target_flags &= ~cpu->disable; | |
758 target_flags |= cpu->enable; | |
759 } | |
760 break; | |
761 } | |
762 | |
763 if (! cpu->name) | |
764 error ("bad value (%s) for %s switch", sel->string, sel->name); | |
765 } | |
766 } | |
767 | |
768 /* If -mfpu or -mno-fpu was explicitly used, don't override with | |
769 the processor default. */ | |
770 if (fpu_option_set) | |
771 target_flags = (target_flags & ~MASK_FPU) | fpu; | |
772 | |
773 /* Don't allow -mvis if FPU is disabled. */ | |
774 if (! TARGET_FPU) | |
775 target_flags &= ~MASK_VIS; | |
776 | |
777 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions | |
778 are available. | |
779 -m64 also implies v9. */ | |
780 if (TARGET_VIS || TARGET_ARCH64) | |
781 { | |
782 target_flags |= MASK_V9; | |
783 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); | |
784 } | |
785 | |
786 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */ | |
787 if (TARGET_V9 && TARGET_ARCH32) | |
788 target_flags |= MASK_DEPRECATED_V8_INSNS; | |
789 | |
790 /* V8PLUS requires V9, makes no sense in 64 bit mode. */ | |
791 if (! TARGET_V9 || TARGET_ARCH64) | |
792 target_flags &= ~MASK_V8PLUS; | |
793 | |
794 /* Don't use stack biasing in 32 bit mode. */ | |
795 if (TARGET_ARCH32) | |
796 target_flags &= ~MASK_STACK_BIAS; | |
797 | |
798 /* Supply a default value for align_functions. */ | |
799 if (align_functions == 0 | |
800 && (sparc_cpu == PROCESSOR_ULTRASPARC | |
801 || sparc_cpu == PROCESSOR_ULTRASPARC3 | |
802 || sparc_cpu == PROCESSOR_NIAGARA | |
803 || sparc_cpu == PROCESSOR_NIAGARA2)) | |
804 align_functions = 32; | |
805 | |
806 /* Validate PCC_STRUCT_RETURN. */ | |
807 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) | |
808 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); | |
809 | |
810 /* Only use .uaxword when compiling for a 64-bit target. */ | |
811 if (!TARGET_ARCH64) | |
812 targetm.asm_out.unaligned_op.di = NULL; | |
813 | |
814 /* Do various machine dependent initializations. */ | |
815 sparc_init_modes (); | |
816 | |
817 /* Acquire unique alias sets for our private stuff. */ | |
818 sparc_sr_alias_set = new_alias_set (); | |
819 struct_value_alias_set = new_alias_set (); | |
820 | |
821 /* Set up function hooks. */ | |
822 init_machine_status = sparc_init_machine_status; | |
823 | |
824 switch (sparc_cpu) | |
825 { | |
826 case PROCESSOR_V7: | |
827 case PROCESSOR_CYPRESS: | |
828 sparc_costs = &cypress_costs; | |
829 break; | |
830 case PROCESSOR_V8: | |
831 case PROCESSOR_SPARCLITE: | |
832 case PROCESSOR_SUPERSPARC: | |
833 sparc_costs = &supersparc_costs; | |
834 break; | |
835 case PROCESSOR_F930: | |
836 case PROCESSOR_F934: | |
837 case PROCESSOR_HYPERSPARC: | |
838 case PROCESSOR_SPARCLITE86X: | |
839 sparc_costs = &hypersparc_costs; | |
840 break; | |
841 case PROCESSOR_SPARCLET: | |
842 case PROCESSOR_TSC701: | |
843 sparc_costs = &sparclet_costs; | |
844 break; | |
845 case PROCESSOR_V9: | |
846 case PROCESSOR_ULTRASPARC: | |
847 sparc_costs = &ultrasparc_costs; | |
848 break; | |
849 case PROCESSOR_ULTRASPARC3: | |
850 sparc_costs = &ultrasparc3_costs; | |
851 break; | |
852 case PROCESSOR_NIAGARA: | |
853 sparc_costs = &niagara_costs; | |
854 break; | |
855 case PROCESSOR_NIAGARA2: | |
856 sparc_costs = &niagara2_costs; | |
857 break; | |
858 }; | |
859 | |
860 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 | |
861 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) | |
862 target_flags |= MASK_LONG_DOUBLE_128; | |
863 #endif | |
864 | |
865 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) | |
866 set_param_value ("simultaneous-prefetches", | |
867 ((sparc_cpu == PROCESSOR_ULTRASPARC | |
868 || sparc_cpu == PROCESSOR_NIAGARA | |
869 || sparc_cpu == PROCESSOR_NIAGARA2) | |
870 ? 2 | |
871 : (sparc_cpu == PROCESSOR_ULTRASPARC3 | |
872 ? 8 : 3))); | |
873 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) | |
874 set_param_value ("l1-cache-line-size", | |
875 ((sparc_cpu == PROCESSOR_ULTRASPARC | |
876 || sparc_cpu == PROCESSOR_ULTRASPARC3 | |
877 || sparc_cpu == PROCESSOR_NIAGARA | |
878 || sparc_cpu == PROCESSOR_NIAGARA2) | |
879 ? 64 : 32)); | |
880 } | |
881 | |
882 #ifdef SUBTARGET_ATTRIBUTE_TABLE | |
883 /* Table of valid machine attributes. */ | |
884 const struct attribute_spec sparc_attribute_table[] = | |
885 { | |
886 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ | |
887 SUBTARGET_ATTRIBUTE_TABLE, | |
888 { NULL, 0, 0, false, false, false, NULL } | |
889 }; | |
890 #endif | |
891 | |
892 /* Miscellaneous utilities. */ | |
893 | |
894 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move | |
895 or branch on register contents instructions. */ | |
896 | |
897 int | |
898 v9_regcmp_p (enum rtx_code code) | |
899 { | |
900 return (code == EQ || code == NE || code == GE || code == LT | |
901 || code == LE || code == GT); | |
902 } | |
903 | |
904 /* Nonzero if OP is a floating point constant which can | |
905 be loaded into an integer register using a single | |
906 sethi instruction. */ | |
907 | |
908 int | |
909 fp_sethi_p (rtx op) | |
910 { | |
911 if (GET_CODE (op) == CONST_DOUBLE) | |
912 { | |
913 REAL_VALUE_TYPE r; | |
914 long i; | |
915 | |
916 REAL_VALUE_FROM_CONST_DOUBLE (r, op); | |
917 REAL_VALUE_TO_TARGET_SINGLE (r, i); | |
918 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); | |
919 } | |
920 | |
921 return 0; | |
922 } | |
923 | |
924 /* Nonzero if OP is a floating point constant which can | |
925 be loaded into an integer register using a single | |
926 mov instruction. */ | |
927 | |
928 int | |
929 fp_mov_p (rtx op) | |
930 { | |
931 if (GET_CODE (op) == CONST_DOUBLE) | |
932 { | |
933 REAL_VALUE_TYPE r; | |
934 long i; | |
935 | |
936 REAL_VALUE_FROM_CONST_DOUBLE (r, op); | |
937 REAL_VALUE_TO_TARGET_SINGLE (r, i); | |
938 return SPARC_SIMM13_P (i); | |
939 } | |
940 | |
941 return 0; | |
942 } | |
943 | |
944 /* Nonzero if OP is a floating point constant which can | |
945 be loaded into an integer register using a high/losum | |
946 instruction sequence. */ | |
947 | |
948 int | |
949 fp_high_losum_p (rtx op) | |
950 { | |
951 /* The constraints calling this should only be in | |
952 SFmode move insns, so any constant which cannot | |
953 be moved using a single insn will do. */ | |
954 if (GET_CODE (op) == CONST_DOUBLE) | |
955 { | |
956 REAL_VALUE_TYPE r; | |
957 long i; | |
958 | |
959 REAL_VALUE_FROM_CONST_DOUBLE (r, op); | |
960 REAL_VALUE_TO_TARGET_SINGLE (r, i); | |
961 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); | |
962 } | |
963 | |
964 return 0; | |
965 } | |
966 | |
967 /* Expand a move instruction. Return true if all work is done. */ | |
968 | |
969 bool | |
970 sparc_expand_move (enum machine_mode mode, rtx *operands) | |
971 { | |
972 /* Handle sets of MEM first. */ | |
973 if (GET_CODE (operands[0]) == MEM) | |
974 { | |
975 /* 0 is a register (or a pair of registers) on SPARC. */ | |
976 if (register_or_zero_operand (operands[1], mode)) | |
977 return false; | |
978 | |
979 if (!reload_in_progress) | |
980 { | |
981 operands[0] = validize_mem (operands[0]); | |
982 operands[1] = force_reg (mode, operands[1]); | |
983 } | |
984 } | |
985 | |
986 /* Fixup TLS cases. */ | |
987 if (TARGET_HAVE_TLS | |
988 && CONSTANT_P (operands[1]) | |
989 && GET_CODE (operands[1]) != HIGH | |
990 && sparc_tls_referenced_p (operands [1])) | |
991 { | |
992 rtx sym = operands[1]; | |
993 rtx addend = NULL; | |
994 | |
995 if (GET_CODE (sym) == CONST && GET_CODE (XEXP (sym, 0)) == PLUS) | |
996 { | |
997 addend = XEXP (XEXP (sym, 0), 1); | |
998 sym = XEXP (XEXP (sym, 0), 0); | |
999 } | |
1000 | |
1001 gcc_assert (SPARC_SYMBOL_REF_TLS_P (sym)); | |
1002 | |
1003 sym = legitimize_tls_address (sym); | |
1004 if (addend) | |
1005 { | |
1006 sym = gen_rtx_PLUS (mode, sym, addend); | |
1007 sym = force_operand (sym, operands[0]); | |
1008 } | |
1009 operands[1] = sym; | |
1010 } | |
1011 | |
1012 /* Fixup PIC cases. */ | |
1013 if (flag_pic && CONSTANT_P (operands[1])) | |
1014 { | |
1015 if (pic_address_needs_scratch (operands[1])) | |
1016 operands[1] = legitimize_pic_address (operands[1], mode, 0); | |
1017 | |
1018 /* VxWorks does not impose a fixed gap between segments; the run-time | |
1019 gap can be different from the object-file gap. We therefore can't | |
1020 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we | |
1021 are absolutely sure that X is in the same segment as the GOT. | |
1022 Unfortunately, the flexibility of linker scripts means that we | |
1023 can't be sure of that in general, so assume that _G_O_T_-relative | |
1024 accesses are never valid on VxWorks. */ | |
1025 if (GET_CODE (operands[1]) == LABEL_REF && !TARGET_VXWORKS_RTP) | |
1026 { | |
1027 if (mode == SImode) | |
1028 { | |
1029 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); | |
1030 return true; | |
1031 } | |
1032 | |
1033 if (mode == DImode) | |
1034 { | |
1035 gcc_assert (TARGET_ARCH64); | |
1036 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); | |
1037 return true; | |
1038 } | |
1039 } | |
1040 | |
1041 if (symbolic_operand (operands[1], mode)) | |
1042 { | |
1043 operands[1] = legitimize_pic_address (operands[1], | |
1044 mode, | |
1045 (reload_in_progress ? | |
1046 operands[0] : | |
1047 NULL_RTX)); | |
1048 return false; | |
1049 } | |
1050 } | |
1051 | |
1052 /* If we are trying to toss an integer constant into FP registers, | |
1053 or loading a FP or vector constant, force it into memory. */ | |
1054 if (CONSTANT_P (operands[1]) | |
1055 && REG_P (operands[0]) | |
1056 && (SPARC_FP_REG_P (REGNO (operands[0])) | |
1057 || SCALAR_FLOAT_MODE_P (mode) | |
1058 || VECTOR_MODE_P (mode))) | |
1059 { | |
1060 /* emit_group_store will send such bogosity to us when it is | |
1061 not storing directly into memory. So fix this up to avoid | |
1062 crashes in output_constant_pool. */ | |
1063 if (operands [1] == const0_rtx) | |
1064 operands[1] = CONST0_RTX (mode); | |
1065 | |
1066 /* We can clear FP registers if TARGET_VIS, and always other regs. */ | |
1067 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) | |
1068 && const_zero_operand (operands[1], mode)) | |
1069 return false; | |
1070 | |
1071 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG | |
1072 /* We are able to build any SF constant in integer registers | |
1073 with at most 2 instructions. */ | |
1074 && (mode == SFmode | |
1075 /* And any DF constant in integer registers. */ | |
1076 || (mode == DFmode | |
1077 && (reload_completed || reload_in_progress)))) | |
1078 return false; | |
1079 | |
1080 operands[1] = force_const_mem (mode, operands[1]); | |
1081 if (!reload_in_progress) | |
1082 operands[1] = validize_mem (operands[1]); | |
1083 return false; | |
1084 } | |
1085 | |
1086 /* Accept non-constants and valid constants unmodified. */ | |
1087 if (!CONSTANT_P (operands[1]) | |
1088 || GET_CODE (operands[1]) == HIGH | |
1089 || input_operand (operands[1], mode)) | |
1090 return false; | |
1091 | |
1092 switch (mode) | |
1093 { | |
1094 case QImode: | |
1095 /* All QImode constants require only one insn, so proceed. */ | |
1096 break; | |
1097 | |
1098 case HImode: | |
1099 case SImode: | |
1100 sparc_emit_set_const32 (operands[0], operands[1]); | |
1101 return true; | |
1102 | |
1103 case DImode: | |
1104 /* input_operand should have filtered out 32-bit mode. */ | |
1105 sparc_emit_set_const64 (operands[0], operands[1]); | |
1106 return true; | |
1107 | |
1108 default: | |
1109 gcc_unreachable (); | |
1110 } | |
1111 | |
1112 return false; | |
1113 } | |
1114 | |
1115 /* Load OP1, a 32-bit constant, into OP0, a register. | |
1116 We know it can't be done in one insn when we get | |
1117 here, the move expander guarantees this. */ | |
1118 | |
1119 void | |
1120 sparc_emit_set_const32 (rtx op0, rtx op1) | |
1121 { | |
1122 enum machine_mode mode = GET_MODE (op0); | |
1123 rtx temp; | |
1124 | |
1125 if (reload_in_progress || reload_completed) | |
1126 temp = op0; | |
1127 else | |
1128 temp = gen_reg_rtx (mode); | |
1129 | |
1130 if (GET_CODE (op1) == CONST_INT) | |
1131 { | |
1132 gcc_assert (!small_int_operand (op1, mode) | |
1133 && !const_high_operand (op1, mode)); | |
1134 | |
1135 /* Emit them as real moves instead of a HIGH/LO_SUM, | |
1136 this way CSE can see everything and reuse intermediate | |
1137 values if it wants. */ | |
1138 emit_insn (gen_rtx_SET (VOIDmode, temp, | |
1139 GEN_INT (INTVAL (op1) | |
1140 & ~(HOST_WIDE_INT)0x3ff))); | |
1141 | |
1142 emit_insn (gen_rtx_SET (VOIDmode, | |
1143 op0, | |
1144 gen_rtx_IOR (mode, temp, | |
1145 GEN_INT (INTVAL (op1) & 0x3ff)))); | |
1146 } | |
1147 else | |
1148 { | |
1149 /* A symbol, emit in the traditional way. */ | |
1150 emit_insn (gen_rtx_SET (VOIDmode, temp, | |
1151 gen_rtx_HIGH (mode, op1))); | |
1152 emit_insn (gen_rtx_SET (VOIDmode, | |
1153 op0, gen_rtx_LO_SUM (mode, temp, op1))); | |
1154 } | |
1155 } | |
1156 | |
1157 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. | |
1158 If TEMP is nonzero, we are forbidden to use any other scratch | |
1159 registers. Otherwise, we are allowed to generate them as needed. | |
1160 | |
1161 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY | |
1162 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ | |
1163 | |
1164 void | |
1165 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) | |
1166 { | |
1167 rtx temp1, temp2, temp3, temp4, temp5; | |
1168 rtx ti_temp = 0; | |
1169 | |
1170 if (temp && GET_MODE (temp) == TImode) | |
1171 { | |
1172 ti_temp = temp; | |
1173 temp = gen_rtx_REG (DImode, REGNO (temp)); | |
1174 } | |
1175 | |
1176 /* SPARC-V9 code-model support. */ | |
1177 switch (sparc_cmodel) | |
1178 { | |
1179 case CM_MEDLOW: | |
1180 /* The range spanned by all instructions in the object is less | |
1181 than 2^31 bytes (2GB) and the distance from any instruction | |
1182 to the location of the label _GLOBAL_OFFSET_TABLE_ is less | |
1183 than 2^31 bytes (2GB). | |
1184 | |
1185 The executable must be in the low 4TB of the virtual address | |
1186 space. | |
1187 | |
1188 sethi %hi(symbol), %temp1 | |
1189 or %temp1, %lo(symbol), %reg */ | |
1190 if (temp) | |
1191 temp1 = temp; /* op0 is allowed. */ | |
1192 else | |
1193 temp1 = gen_reg_rtx (DImode); | |
1194 | |
1195 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1))); | |
1196 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1))); | |
1197 break; | |
1198 | |
1199 case CM_MEDMID: | |
1200 /* The range spanned by all instructions in the object is less | |
1201 than 2^31 bytes (2GB) and the distance from any instruction | |
1202 to the location of the label _GLOBAL_OFFSET_TABLE_ is less | |
1203 than 2^31 bytes (2GB). | |
1204 | |
1205 The executable must be in the low 16TB of the virtual address | |
1206 space. | |
1207 | |
1208 sethi %h44(symbol), %temp1 | |
1209 or %temp1, %m44(symbol), %temp2 | |
1210 sllx %temp2, 12, %temp3 | |
1211 or %temp3, %l44(symbol), %reg */ | |
1212 if (temp) | |
1213 { | |
1214 temp1 = op0; | |
1215 temp2 = op0; | |
1216 temp3 = temp; /* op0 is allowed. */ | |
1217 } | |
1218 else | |
1219 { | |
1220 temp1 = gen_reg_rtx (DImode); | |
1221 temp2 = gen_reg_rtx (DImode); | |
1222 temp3 = gen_reg_rtx (DImode); | |
1223 } | |
1224 | |
1225 emit_insn (gen_seth44 (temp1, op1)); | |
1226 emit_insn (gen_setm44 (temp2, temp1, op1)); | |
1227 emit_insn (gen_rtx_SET (VOIDmode, temp3, | |
1228 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); | |
1229 emit_insn (gen_setl44 (op0, temp3, op1)); | |
1230 break; | |
1231 | |
1232 case CM_MEDANY: | |
1233 /* The range spanned by all instructions in the object is less | |
1234 than 2^31 bytes (2GB) and the distance from any instruction | |
1235 to the location of the label _GLOBAL_OFFSET_TABLE_ is less | |
1236 than 2^31 bytes (2GB). | |
1237 | |
1238 The executable can be placed anywhere in the virtual address | |
1239 space. | |
1240 | |
1241 sethi %hh(symbol), %temp1 | |
1242 sethi %lm(symbol), %temp2 | |
1243 or %temp1, %hm(symbol), %temp3 | |
1244 sllx %temp3, 32, %temp4 | |
1245 or %temp4, %temp2, %temp5 | |
1246 or %temp5, %lo(symbol), %reg */ | |
1247 if (temp) | |
1248 { | |
1249 /* It is possible that one of the registers we got for operands[2] | |
1250 might coincide with that of operands[0] (which is why we made | |
1251 it TImode). Pick the other one to use as our scratch. */ | |
1252 if (rtx_equal_p (temp, op0)) | |
1253 { | |
1254 gcc_assert (ti_temp); | |
1255 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); | |
1256 } | |
1257 temp1 = op0; | |
1258 temp2 = temp; /* op0 is _not_ allowed, see above. */ | |
1259 temp3 = op0; | |
1260 temp4 = op0; | |
1261 temp5 = op0; | |
1262 } | |
1263 else | |
1264 { | |
1265 temp1 = gen_reg_rtx (DImode); | |
1266 temp2 = gen_reg_rtx (DImode); | |
1267 temp3 = gen_reg_rtx (DImode); | |
1268 temp4 = gen_reg_rtx (DImode); | |
1269 temp5 = gen_reg_rtx (DImode); | |
1270 } | |
1271 | |
1272 emit_insn (gen_sethh (temp1, op1)); | |
1273 emit_insn (gen_setlm (temp2, op1)); | |
1274 emit_insn (gen_sethm (temp3, temp1, op1)); | |
1275 emit_insn (gen_rtx_SET (VOIDmode, temp4, | |
1276 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); | |
1277 emit_insn (gen_rtx_SET (VOIDmode, temp5, | |
1278 gen_rtx_PLUS (DImode, temp4, temp2))); | |
1279 emit_insn (gen_setlo (op0, temp5, op1)); | |
1280 break; | |
1281 | |
1282 case CM_EMBMEDANY: | |
1283 /* Old old old backwards compatibility kruft here. | |
1284 Essentially it is MEDLOW with a fixed 64-bit | |
1285 virtual base added to all data segment addresses. | |
1286 Text-segment stuff is computed like MEDANY, we can't | |
1287 reuse the code above because the relocation knobs | |
1288 look different. | |
1289 | |
1290 Data segment: sethi %hi(symbol), %temp1 | |
1291 add %temp1, EMBMEDANY_BASE_REG, %temp2 | |
1292 or %temp2, %lo(symbol), %reg */ | |
1293 if (data_segment_operand (op1, GET_MODE (op1))) | |
1294 { | |
1295 if (temp) | |
1296 { | |
1297 temp1 = temp; /* op0 is allowed. */ | |
1298 temp2 = op0; | |
1299 } | |
1300 else | |
1301 { | |
1302 temp1 = gen_reg_rtx (DImode); | |
1303 temp2 = gen_reg_rtx (DImode); | |
1304 } | |
1305 | |
1306 emit_insn (gen_embmedany_sethi (temp1, op1)); | |
1307 emit_insn (gen_embmedany_brsum (temp2, temp1)); | |
1308 emit_insn (gen_embmedany_losum (op0, temp2, op1)); | |
1309 } | |
1310 | |
1311 /* Text segment: sethi %uhi(symbol), %temp1 | |
1312 sethi %hi(symbol), %temp2 | |
1313 or %temp1, %ulo(symbol), %temp3 | |
1314 sllx %temp3, 32, %temp4 | |
1315 or %temp4, %temp2, %temp5 | |
1316 or %temp5, %lo(symbol), %reg */ | |
1317 else | |
1318 { | |
1319 if (temp) | |
1320 { | |
1321 /* It is possible that one of the registers we got for operands[2] | |
1322 might coincide with that of operands[0] (which is why we made | |
1323 it TImode). Pick the other one to use as our scratch. */ | |
1324 if (rtx_equal_p (temp, op0)) | |
1325 { | |
1326 gcc_assert (ti_temp); | |
1327 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); | |
1328 } | |
1329 temp1 = op0; | |
1330 temp2 = temp; /* op0 is _not_ allowed, see above. */ | |
1331 temp3 = op0; | |
1332 temp4 = op0; | |
1333 temp5 = op0; | |
1334 } | |
1335 else | |
1336 { | |
1337 temp1 = gen_reg_rtx (DImode); | |
1338 temp2 = gen_reg_rtx (DImode); | |
1339 temp3 = gen_reg_rtx (DImode); | |
1340 temp4 = gen_reg_rtx (DImode); | |
1341 temp5 = gen_reg_rtx (DImode); | |
1342 } | |
1343 | |
1344 emit_insn (gen_embmedany_textuhi (temp1, op1)); | |
1345 emit_insn (gen_embmedany_texthi (temp2, op1)); | |
1346 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); | |
1347 emit_insn (gen_rtx_SET (VOIDmode, temp4, | |
1348 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); | |
1349 emit_insn (gen_rtx_SET (VOIDmode, temp5, | |
1350 gen_rtx_PLUS (DImode, temp4, temp2))); | |
1351 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); | |
1352 } | |
1353 break; | |
1354 | |
1355 default: | |
1356 gcc_unreachable (); | |
1357 } | |
1358 } | |
1359 | |
1360 #if HOST_BITS_PER_WIDE_INT == 32 | |
1361 void | |
1362 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED) | |
1363 { | |
1364 gcc_unreachable (); | |
1365 } | |
1366 #else | |
1367 /* These avoid problems when cross compiling. If we do not | |
1368 go through all this hair then the optimizer will see | |
1369 invalid REG_EQUAL notes or in some cases none at all. */ | |
1370 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); | |
1371 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); | |
1372 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); | |
1373 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); | |
1374 | |
1375 /* The optimizer is not to assume anything about exactly | |
1376 which bits are set for a HIGH, they are unspecified. | |
1377 Unfortunately this leads to many missed optimizations | |
1378 during CSE. We mask out the non-HIGH bits, and matches | |
1379 a plain movdi, to alleviate this problem. */ | |
1380 static rtx | |
1381 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) | |
1382 { | |
1383 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); | |
1384 } | |
1385 | |
1386 static rtx | |
1387 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) | |
1388 { | |
1389 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val)); | |
1390 } | |
1391 | |
1392 static rtx | |
1393 gen_safe_OR64 (rtx src, HOST_WIDE_INT val) | |
1394 { | |
1395 return gen_rtx_IOR (DImode, src, GEN_INT (val)); | |
1396 } | |
1397 | |
1398 static rtx | |
1399 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) | |
1400 { | |
1401 return gen_rtx_XOR (DImode, src, GEN_INT (val)); | |
1402 } | |
1403 | |
1404 /* Worker routines for 64-bit constant formation on arch64. | |
1405 One of the key things to be doing in these emissions is | |
1406 to create as many temp REGs as possible. This makes it | |
1407 possible for half-built constants to be used later when | |
1408 such values are similar to something required later on. | |
1409 Without doing this, the optimizer cannot see such | |
1410 opportunities. */ | |
1411 | |
1412 static void sparc_emit_set_const64_quick1 (rtx, rtx, | |
1413 unsigned HOST_WIDE_INT, int); | |
1414 | |
1415 static void | |
1416 sparc_emit_set_const64_quick1 (rtx op0, rtx temp, | |
1417 unsigned HOST_WIDE_INT low_bits, int is_neg) | |
1418 { | |
1419 unsigned HOST_WIDE_INT high_bits; | |
1420 | |
1421 if (is_neg) | |
1422 high_bits = (~low_bits) & 0xffffffff; | |
1423 else | |
1424 high_bits = low_bits; | |
1425 | |
1426 emit_insn (gen_safe_HIGH64 (temp, high_bits)); | |
1427 if (!is_neg) | |
1428 { | |
1429 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1430 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); | |
1431 } | |
1432 else | |
1433 { | |
1434 /* If we are XOR'ing with -1, then we should emit a one's complement | |
1435 instead. This way the combiner will notice logical operations | |
1436 such as ANDN later on and substitute. */ | |
1437 if ((low_bits & 0x3ff) == 0x3ff) | |
1438 { | |
1439 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1440 gen_rtx_NOT (DImode, temp))); | |
1441 } | |
1442 else | |
1443 { | |
1444 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1445 gen_safe_XOR64 (temp, | |
1446 (-(HOST_WIDE_INT)0x400 | |
1447 | (low_bits & 0x3ff))))); | |
1448 } | |
1449 } | |
1450 } | |
1451 | |
1452 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, | |
1453 unsigned HOST_WIDE_INT, int); | |
1454 | |
1455 static void | |
1456 sparc_emit_set_const64_quick2 (rtx op0, rtx temp, | |
1457 unsigned HOST_WIDE_INT high_bits, | |
1458 unsigned HOST_WIDE_INT low_immediate, | |
1459 int shift_count) | |
1460 { | |
1461 rtx temp2 = op0; | |
1462 | |
1463 if ((high_bits & 0xfffffc00) != 0) | |
1464 { | |
1465 emit_insn (gen_safe_HIGH64 (temp, high_bits)); | |
1466 if ((high_bits & ~0xfffffc00) != 0) | |
1467 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1468 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); | |
1469 else | |
1470 temp2 = temp; | |
1471 } | |
1472 else | |
1473 { | |
1474 emit_insn (gen_safe_SET64 (temp, high_bits)); | |
1475 temp2 = temp; | |
1476 } | |
1477 | |
1478 /* Now shift it up into place. */ | |
1479 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1480 gen_rtx_ASHIFT (DImode, temp2, | |
1481 GEN_INT (shift_count)))); | |
1482 | |
1483 /* If there is a low immediate part piece, finish up by | |
1484 putting that in as well. */ | |
1485 if (low_immediate != 0) | |
1486 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1487 gen_safe_OR64 (op0, low_immediate))); | |
1488 } | |
1489 | |
1490 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, | |
1491 unsigned HOST_WIDE_INT); | |
1492 | |
1493 /* Full 64-bit constant decomposition. Even though this is the | |
1494 'worst' case, we still optimize a few things away. */ | |
1495 static void | |
1496 sparc_emit_set_const64_longway (rtx op0, rtx temp, | |
1497 unsigned HOST_WIDE_INT high_bits, | |
1498 unsigned HOST_WIDE_INT low_bits) | |
1499 { | |
1500 rtx sub_temp; | |
1501 | |
1502 if (reload_in_progress || reload_completed) | |
1503 sub_temp = op0; | |
1504 else | |
1505 sub_temp = gen_reg_rtx (DImode); | |
1506 | |
1507 if ((high_bits & 0xfffffc00) != 0) | |
1508 { | |
1509 emit_insn (gen_safe_HIGH64 (temp, high_bits)); | |
1510 if ((high_bits & ~0xfffffc00) != 0) | |
1511 emit_insn (gen_rtx_SET (VOIDmode, | |
1512 sub_temp, | |
1513 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); | |
1514 else | |
1515 sub_temp = temp; | |
1516 } | |
1517 else | |
1518 { | |
1519 emit_insn (gen_safe_SET64 (temp, high_bits)); | |
1520 sub_temp = temp; | |
1521 } | |
1522 | |
1523 if (!reload_in_progress && !reload_completed) | |
1524 { | |
1525 rtx temp2 = gen_reg_rtx (DImode); | |
1526 rtx temp3 = gen_reg_rtx (DImode); | |
1527 rtx temp4 = gen_reg_rtx (DImode); | |
1528 | |
1529 emit_insn (gen_rtx_SET (VOIDmode, temp4, | |
1530 gen_rtx_ASHIFT (DImode, sub_temp, | |
1531 GEN_INT (32)))); | |
1532 | |
1533 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); | |
1534 if ((low_bits & ~0xfffffc00) != 0) | |
1535 { | |
1536 emit_insn (gen_rtx_SET (VOIDmode, temp3, | |
1537 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); | |
1538 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1539 gen_rtx_PLUS (DImode, temp4, temp3))); | |
1540 } | |
1541 else | |
1542 { | |
1543 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1544 gen_rtx_PLUS (DImode, temp4, temp2))); | |
1545 } | |
1546 } | |
1547 else | |
1548 { | |
1549 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); | |
1550 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); | |
1551 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); | |
1552 int to_shift = 12; | |
1553 | |
1554 /* We are in the middle of reload, so this is really | |
1555 painful. However we do still make an attempt to | |
1556 avoid emitting truly stupid code. */ | |
1557 if (low1 != const0_rtx) | |
1558 { | |
1559 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1560 gen_rtx_ASHIFT (DImode, sub_temp, | |
1561 GEN_INT (to_shift)))); | |
1562 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1563 gen_rtx_IOR (DImode, op0, low1))); | |
1564 sub_temp = op0; | |
1565 to_shift = 12; | |
1566 } | |
1567 else | |
1568 { | |
1569 to_shift += 12; | |
1570 } | |
1571 if (low2 != const0_rtx) | |
1572 { | |
1573 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1574 gen_rtx_ASHIFT (DImode, sub_temp, | |
1575 GEN_INT (to_shift)))); | |
1576 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1577 gen_rtx_IOR (DImode, op0, low2))); | |
1578 sub_temp = op0; | |
1579 to_shift = 8; | |
1580 } | |
1581 else | |
1582 { | |
1583 to_shift += 8; | |
1584 } | |
1585 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1586 gen_rtx_ASHIFT (DImode, sub_temp, | |
1587 GEN_INT (to_shift)))); | |
1588 if (low3 != const0_rtx) | |
1589 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1590 gen_rtx_IOR (DImode, op0, low3))); | |
1591 /* phew... */ | |
1592 } | |
1593 } | |
1594 | |
1595 /* Analyze a 64-bit constant for certain properties. */ | |
1596 static void analyze_64bit_constant (unsigned HOST_WIDE_INT, | |
1597 unsigned HOST_WIDE_INT, | |
1598 int *, int *, int *); | |
1599 | |
1600 static void | |
1601 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, | |
1602 unsigned HOST_WIDE_INT low_bits, | |
1603 int *hbsp, int *lbsp, int *abbasp) | |
1604 { | |
1605 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; | |
1606 int i; | |
1607 | |
1608 lowest_bit_set = highest_bit_set = -1; | |
1609 i = 0; | |
1610 do | |
1611 { | |
1612 if ((lowest_bit_set == -1) | |
1613 && ((low_bits >> i) & 1)) | |
1614 lowest_bit_set = i; | |
1615 if ((highest_bit_set == -1) | |
1616 && ((high_bits >> (32 - i - 1)) & 1)) | |
1617 highest_bit_set = (64 - i - 1); | |
1618 } | |
1619 while (++i < 32 | |
1620 && ((highest_bit_set == -1) | |
1621 || (lowest_bit_set == -1))); | |
1622 if (i == 32) | |
1623 { | |
1624 i = 0; | |
1625 do | |
1626 { | |
1627 if ((lowest_bit_set == -1) | |
1628 && ((high_bits >> i) & 1)) | |
1629 lowest_bit_set = i + 32; | |
1630 if ((highest_bit_set == -1) | |
1631 && ((low_bits >> (32 - i - 1)) & 1)) | |
1632 highest_bit_set = 32 - i - 1; | |
1633 } | |
1634 while (++i < 32 | |
1635 && ((highest_bit_set == -1) | |
1636 || (lowest_bit_set == -1))); | |
1637 } | |
1638 /* If there are no bits set this should have gone out | |
1639 as one instruction! */ | |
1640 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); | |
1641 all_bits_between_are_set = 1; | |
1642 for (i = lowest_bit_set; i <= highest_bit_set; i++) | |
1643 { | |
1644 if (i < 32) | |
1645 { | |
1646 if ((low_bits & (1 << i)) != 0) | |
1647 continue; | |
1648 } | |
1649 else | |
1650 { | |
1651 if ((high_bits & (1 << (i - 32))) != 0) | |
1652 continue; | |
1653 } | |
1654 all_bits_between_are_set = 0; | |
1655 break; | |
1656 } | |
1657 *hbsp = highest_bit_set; | |
1658 *lbsp = lowest_bit_set; | |
1659 *abbasp = all_bits_between_are_set; | |
1660 } | |
1661 | |
1662 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); | |
1663 | |
1664 static int | |
1665 const64_is_2insns (unsigned HOST_WIDE_INT high_bits, | |
1666 unsigned HOST_WIDE_INT low_bits) | |
1667 { | |
1668 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; | |
1669 | |
1670 if (high_bits == 0 | |
1671 || high_bits == 0xffffffff) | |
1672 return 1; | |
1673 | |
1674 analyze_64bit_constant (high_bits, low_bits, | |
1675 &highest_bit_set, &lowest_bit_set, | |
1676 &all_bits_between_are_set); | |
1677 | |
1678 if ((highest_bit_set == 63 | |
1679 || lowest_bit_set == 0) | |
1680 && all_bits_between_are_set != 0) | |
1681 return 1; | |
1682 | |
1683 if ((highest_bit_set - lowest_bit_set) < 21) | |
1684 return 1; | |
1685 | |
1686 return 0; | |
1687 } | |
1688 | |
1689 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, | |
1690 unsigned HOST_WIDE_INT, | |
1691 int, int); | |
1692 | |
1693 static unsigned HOST_WIDE_INT | |
1694 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, | |
1695 unsigned HOST_WIDE_INT low_bits, | |
1696 int lowest_bit_set, int shift) | |
1697 { | |
1698 HOST_WIDE_INT hi, lo; | |
1699 | |
1700 if (lowest_bit_set < 32) | |
1701 { | |
1702 lo = (low_bits >> lowest_bit_set) << shift; | |
1703 hi = ((high_bits << (32 - lowest_bit_set)) << shift); | |
1704 } | |
1705 else | |
1706 { | |
1707 lo = 0; | |
1708 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); | |
1709 } | |
1710 gcc_assert (! (hi & lo)); | |
1711 return (hi | lo); | |
1712 } | |
1713 | |
1714 /* Here we are sure to be arch64 and this is an integer constant | |
1715 being loaded into a register. Emit the most efficient | |
1716 insn sequence possible. Detection of all the 1-insn cases | |
1717 has been done already. */ | |
1718 void | |
1719 sparc_emit_set_const64 (rtx op0, rtx op1) | |
1720 { | |
1721 unsigned HOST_WIDE_INT high_bits, low_bits; | |
1722 int lowest_bit_set, highest_bit_set; | |
1723 int all_bits_between_are_set; | |
1724 rtx temp = 0; | |
1725 | |
1726 /* Sanity check that we know what we are working with. */ | |
1727 gcc_assert (TARGET_ARCH64 | |
1728 && (GET_CODE (op0) == SUBREG | |
1729 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); | |
1730 | |
1731 if (reload_in_progress || reload_completed) | |
1732 temp = op0; | |
1733 | |
1734 if (GET_CODE (op1) != CONST_INT) | |
1735 { | |
1736 sparc_emit_set_symbolic_const64 (op0, op1, temp); | |
1737 return; | |
1738 } | |
1739 | |
1740 if (! temp) | |
1741 temp = gen_reg_rtx (DImode); | |
1742 | |
1743 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); | |
1744 low_bits = (INTVAL (op1) & 0xffffffff); | |
1745 | |
1746 /* low_bits bits 0 --> 31 | |
1747 high_bits bits 32 --> 63 */ | |
1748 | |
1749 analyze_64bit_constant (high_bits, low_bits, | |
1750 &highest_bit_set, &lowest_bit_set, | |
1751 &all_bits_between_are_set); | |
1752 | |
1753 /* First try for a 2-insn sequence. */ | |
1754 | |
1755 /* These situations are preferred because the optimizer can | |
1756 * do more things with them: | |
1757 * 1) mov -1, %reg | |
1758 * sllx %reg, shift, %reg | |
1759 * 2) mov -1, %reg | |
1760 * srlx %reg, shift, %reg | |
1761 * 3) mov some_small_const, %reg | |
1762 * sllx %reg, shift, %reg | |
1763 */ | |
1764 if (((highest_bit_set == 63 | |
1765 || lowest_bit_set == 0) | |
1766 && all_bits_between_are_set != 0) | |
1767 || ((highest_bit_set - lowest_bit_set) < 12)) | |
1768 { | |
1769 HOST_WIDE_INT the_const = -1; | |
1770 int shift = lowest_bit_set; | |
1771 | |
1772 if ((highest_bit_set != 63 | |
1773 && lowest_bit_set != 0) | |
1774 || all_bits_between_are_set == 0) | |
1775 { | |
1776 the_const = | |
1777 create_simple_focus_bits (high_bits, low_bits, | |
1778 lowest_bit_set, 0); | |
1779 } | |
1780 else if (lowest_bit_set == 0) | |
1781 shift = -(63 - highest_bit_set); | |
1782 | |
1783 gcc_assert (SPARC_SIMM13_P (the_const)); | |
1784 gcc_assert (shift != 0); | |
1785 | |
1786 emit_insn (gen_safe_SET64 (temp, the_const)); | |
1787 if (shift > 0) | |
1788 emit_insn (gen_rtx_SET (VOIDmode, | |
1789 op0, | |
1790 gen_rtx_ASHIFT (DImode, | |
1791 temp, | |
1792 GEN_INT (shift)))); | |
1793 else if (shift < 0) | |
1794 emit_insn (gen_rtx_SET (VOIDmode, | |
1795 op0, | |
1796 gen_rtx_LSHIFTRT (DImode, | |
1797 temp, | |
1798 GEN_INT (-shift)))); | |
1799 return; | |
1800 } | |
1801 | |
1802 /* Now a range of 22 or less bits set somewhere. | |
1803 * 1) sethi %hi(focus_bits), %reg | |
1804 * sllx %reg, shift, %reg | |
1805 * 2) sethi %hi(focus_bits), %reg | |
1806 * srlx %reg, shift, %reg | |
1807 */ | |
1808 if ((highest_bit_set - lowest_bit_set) < 21) | |
1809 { | |
1810 unsigned HOST_WIDE_INT focus_bits = | |
1811 create_simple_focus_bits (high_bits, low_bits, | |
1812 lowest_bit_set, 10); | |
1813 | |
1814 gcc_assert (SPARC_SETHI_P (focus_bits)); | |
1815 gcc_assert (lowest_bit_set != 10); | |
1816 | |
1817 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); | |
1818 | |
1819 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ | |
1820 if (lowest_bit_set < 10) | |
1821 emit_insn (gen_rtx_SET (VOIDmode, | |
1822 op0, | |
1823 gen_rtx_LSHIFTRT (DImode, temp, | |
1824 GEN_INT (10 - lowest_bit_set)))); | |
1825 else if (lowest_bit_set > 10) | |
1826 emit_insn (gen_rtx_SET (VOIDmode, | |
1827 op0, | |
1828 gen_rtx_ASHIFT (DImode, temp, | |
1829 GEN_INT (lowest_bit_set - 10)))); | |
1830 return; | |
1831 } | |
1832 | |
1833 /* 1) sethi %hi(low_bits), %reg | |
1834 * or %reg, %lo(low_bits), %reg | |
1835 * 2) sethi %hi(~low_bits), %reg | |
1836 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg | |
1837 */ | |
1838 if (high_bits == 0 | |
1839 || high_bits == 0xffffffff) | |
1840 { | |
1841 sparc_emit_set_const64_quick1 (op0, temp, low_bits, | |
1842 (high_bits == 0xffffffff)); | |
1843 return; | |
1844 } | |
1845 | |
1846 /* Now, try 3-insn sequences. */ | |
1847 | |
1848 /* 1) sethi %hi(high_bits), %reg | |
1849 * or %reg, %lo(high_bits), %reg | |
1850 * sllx %reg, 32, %reg | |
1851 */ | |
1852 if (low_bits == 0) | |
1853 { | |
1854 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); | |
1855 return; | |
1856 } | |
1857 | |
1858 /* We may be able to do something quick | |
1859 when the constant is negated, so try that. */ | |
1860 if (const64_is_2insns ((~high_bits) & 0xffffffff, | |
1861 (~low_bits) & 0xfffffc00)) | |
1862 { | |
1863 /* NOTE: The trailing bits get XOR'd so we need the | |
1864 non-negated bits, not the negated ones. */ | |
1865 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; | |
1866 | |
1867 if ((((~high_bits) & 0xffffffff) == 0 | |
1868 && ((~low_bits) & 0x80000000) == 0) | |
1869 || (((~high_bits) & 0xffffffff) == 0xffffffff | |
1870 && ((~low_bits) & 0x80000000) != 0)) | |
1871 { | |
1872 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); | |
1873 | |
1874 if ((SPARC_SETHI_P (fast_int) | |
1875 && (~high_bits & 0xffffffff) == 0) | |
1876 || SPARC_SIMM13_P (fast_int)) | |
1877 emit_insn (gen_safe_SET64 (temp, fast_int)); | |
1878 else | |
1879 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); | |
1880 } | |
1881 else | |
1882 { | |
1883 rtx negated_const; | |
1884 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | | |
1885 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); | |
1886 sparc_emit_set_const64 (temp, negated_const); | |
1887 } | |
1888 | |
1889 /* If we are XOR'ing with -1, then we should emit a one's complement | |
1890 instead. This way the combiner will notice logical operations | |
1891 such as ANDN later on and substitute. */ | |
1892 if (trailing_bits == 0x3ff) | |
1893 { | |
1894 emit_insn (gen_rtx_SET (VOIDmode, op0, | |
1895 gen_rtx_NOT (DImode, temp))); | |
1896 } | |
1897 else | |
1898 { | |
1899 emit_insn (gen_rtx_SET (VOIDmode, | |
1900 op0, | |
1901 gen_safe_XOR64 (temp, | |
1902 (-0x400 | trailing_bits)))); | |
1903 } | |
1904 return; | |
1905 } | |
1906 | |
1907 /* 1) sethi %hi(xxx), %reg | |
1908 * or %reg, %lo(xxx), %reg | |
1909 * sllx %reg, yyy, %reg | |
1910 * | |
1911 * ??? This is just a generalized version of the low_bits==0 | |
1912 * thing above, FIXME... | |
1913 */ | |
1914 if ((highest_bit_set - lowest_bit_set) < 32) | |
1915 { | |
1916 unsigned HOST_WIDE_INT focus_bits = | |
1917 create_simple_focus_bits (high_bits, low_bits, | |
1918 lowest_bit_set, 0); | |
1919 | |
1920 /* We can't get here in this state. */ | |
1921 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); | |
1922 | |
1923 /* So what we know is that the set bits straddle the | |
1924 middle of the 64-bit word. */ | |
1925 sparc_emit_set_const64_quick2 (op0, temp, | |
1926 focus_bits, 0, | |
1927 lowest_bit_set); | |
1928 return; | |
1929 } | |
1930 | |
1931 /* 1) sethi %hi(high_bits), %reg | |
1932 * or %reg, %lo(high_bits), %reg | |
1933 * sllx %reg, 32, %reg | |
1934 * or %reg, low_bits, %reg | |
1935 */ | |
1936 if (SPARC_SIMM13_P(low_bits) | |
1937 && ((int)low_bits > 0)) | |
1938 { | |
1939 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); | |
1940 return; | |
1941 } | |
1942 | |
1943 /* The easiest way when all else fails, is full decomposition. */ | |
1944 #if 0 | |
1945 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n", | |
1946 high_bits, low_bits, ~high_bits, ~low_bits); | |
1947 #endif | |
1948 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); | |
1949 } | |
1950 #endif /* HOST_BITS_PER_WIDE_INT == 32 */ | |
1951 | |
1952 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, | |
1953 return the mode to be used for the comparison. For floating-point, | |
1954 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand | |
1955 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special | |
1956 processing is needed. */ | |
1957 | |
1958 enum machine_mode | |
1959 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED) | |
1960 { | |
1961 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) | |
1962 { | |
1963 switch (op) | |
1964 { | |
1965 case EQ: | |
1966 case NE: | |
1967 case UNORDERED: | |
1968 case ORDERED: | |
1969 case UNLT: | |
1970 case UNLE: | |
1971 case UNGT: | |
1972 case UNGE: | |
1973 case UNEQ: | |
1974 case LTGT: | |
1975 return CCFPmode; | |
1976 | |
1977 case LT: | |
1978 case LE: | |
1979 case GT: | |
1980 case GE: | |
1981 return CCFPEmode; | |
1982 | |
1983 default: | |
1984 gcc_unreachable (); | |
1985 } | |
1986 } | |
1987 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS | |
1988 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) | |
1989 { | |
1990 if (TARGET_ARCH64 && GET_MODE (x) == DImode) | |
1991 return CCX_NOOVmode; | |
1992 else | |
1993 return CC_NOOVmode; | |
1994 } | |
1995 else | |
1996 { | |
1997 if (TARGET_ARCH64 && GET_MODE (x) == DImode) | |
1998 return CCXmode; | |
1999 else | |
2000 return CCmode; | |
2001 } | |
2002 } | |
2003 | |
2004 /* Emit the compare insn and return the CC reg for a CODE comparison. */ | |
2005 | |
2006 rtx | |
2007 gen_compare_reg (enum rtx_code code) | |
2008 { | |
2009 rtx x = sparc_compare_op0; | |
2010 rtx y = sparc_compare_op1; | |
2011 enum machine_mode mode = SELECT_CC_MODE (code, x, y); | |
2012 rtx cc_reg; | |
2013 | |
2014 if (sparc_compare_emitted != NULL_RTX) | |
2015 { | |
2016 cc_reg = sparc_compare_emitted; | |
2017 sparc_compare_emitted = NULL_RTX; | |
2018 return cc_reg; | |
2019 } | |
2020 | |
2021 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the | |
2022 fcc regs (cse can't tell they're really call clobbered regs and will | |
2023 remove a duplicate comparison even if there is an intervening function | |
2024 call - it will then try to reload the cc reg via an int reg which is why | |
2025 we need the movcc patterns). It is possible to provide the movcc | |
2026 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two | |
2027 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be | |
2028 to tell cse that CCFPE mode registers (even pseudos) are call | |
2029 clobbered. */ | |
2030 | |
2031 /* ??? This is an experiment. Rather than making changes to cse which may | |
2032 or may not be easy/clean, we do our own cse. This is possible because | |
2033 we will generate hard registers. Cse knows they're call clobbered (it | |
2034 doesn't know the same thing about pseudos). If we guess wrong, no big | |
2035 deal, but if we win, great! */ | |
2036 | |
2037 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) | |
2038 #if 1 /* experiment */ | |
2039 { | |
2040 int reg; | |
2041 /* We cycle through the registers to ensure they're all exercised. */ | |
2042 static int next_fcc_reg = 0; | |
2043 /* Previous x,y for each fcc reg. */ | |
2044 static rtx prev_args[4][2]; | |
2045 | |
2046 /* Scan prev_args for x,y. */ | |
2047 for (reg = 0; reg < 4; reg++) | |
2048 if (prev_args[reg][0] == x && prev_args[reg][1] == y) | |
2049 break; | |
2050 if (reg == 4) | |
2051 { | |
2052 reg = next_fcc_reg; | |
2053 prev_args[reg][0] = x; | |
2054 prev_args[reg][1] = y; | |
2055 next_fcc_reg = (next_fcc_reg + 1) & 3; | |
2056 } | |
2057 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); | |
2058 } | |
2059 #else | |
2060 cc_reg = gen_reg_rtx (mode); | |
2061 #endif /* ! experiment */ | |
2062 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) | |
2063 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); | |
2064 else | |
2065 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); | |
2066 | |
2067 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this | |
2068 will only result in an unrecognizable insn so no point in asserting. */ | |
2069 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y))); | |
2070 | |
2071 return cc_reg; | |
2072 } | |
2073 | |
2074 /* Same as above but return the whole compare operator. */ | |
2075 | |
2076 rtx | |
2077 gen_compare_operator (enum rtx_code code) | |
2078 { | |
2079 rtx cc_reg; | |
2080 | |
2081 if (GET_MODE (sparc_compare_op0) == TFmode && !TARGET_HARD_QUAD) | |
2082 code | |
2083 = sparc_emit_float_lib_cmp (sparc_compare_op0, sparc_compare_op1, code); | |
2084 | |
2085 cc_reg = gen_compare_reg (code); | |
2086 return gen_rtx_fmt_ee (code, GET_MODE (cc_reg), cc_reg, const0_rtx); | |
2087 } | |
2088 | |
2089 /* This function is used for v9 only. | |
2090 CODE is the code for an Scc's comparison. | |
2091 OPERANDS[0] is the target of the Scc insn. | |
2092 OPERANDS[1] is the value we compare against const0_rtx (which hasn't | |
2093 been generated yet). | |
2094 | |
2095 This function is needed to turn | |
2096 | |
2097 (set (reg:SI 110) | |
2098 (gt (reg:CCX 100 %icc) | |
2099 (const_int 0))) | |
2100 into | |
2101 (set (reg:SI 110) | |
2102 (gt:DI (reg:CCX 100 %icc) | |
2103 (const_int 0))) | |
2104 | |
2105 IE: The instruction recognizer needs to see the mode of the comparison to | |
2106 find the right instruction. We could use "gt:DI" right in the | |
2107 define_expand, but leaving it out allows us to handle DI, SI, etc. | |
2108 | |
2109 We refer to the global sparc compare operands sparc_compare_op0 and | |
2110 sparc_compare_op1. */ | |
2111 | |
2112 int | |
2113 gen_v9_scc (enum rtx_code compare_code, register rtx *operands) | |
2114 { | |
2115 if (! TARGET_ARCH64 | |
2116 && (GET_MODE (sparc_compare_op0) == DImode | |
2117 || GET_MODE (operands[0]) == DImode)) | |
2118 return 0; | |
2119 | |
2120 /* Try to use the movrCC insns. */ | |
2121 if (TARGET_ARCH64 | |
2122 && GET_MODE_CLASS (GET_MODE (sparc_compare_op0)) == MODE_INT | |
2123 && sparc_compare_op1 == const0_rtx | |
2124 && v9_regcmp_p (compare_code)) | |
2125 { | |
2126 rtx op0 = sparc_compare_op0; | |
2127 rtx temp; | |
2128 | |
2129 /* Special case for op0 != 0. This can be done with one instruction if | |
2130 operands[0] == sparc_compare_op0. */ | |
2131 | |
2132 if (compare_code == NE | |
2133 && GET_MODE (operands[0]) == DImode | |
2134 && rtx_equal_p (op0, operands[0])) | |
2135 { | |
2136 emit_insn (gen_rtx_SET (VOIDmode, operands[0], | |
2137 gen_rtx_IF_THEN_ELSE (DImode, | |
2138 gen_rtx_fmt_ee (compare_code, DImode, | |
2139 op0, const0_rtx), | |
2140 const1_rtx, | |
2141 operands[0]))); | |
2142 return 1; | |
2143 } | |
2144 | |
2145 if (reg_overlap_mentioned_p (operands[0], op0)) | |
2146 { | |
2147 /* Handle the case where operands[0] == sparc_compare_op0. | |
2148 We "early clobber" the result. */ | |
2149 op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0)); | |
2150 emit_move_insn (op0, sparc_compare_op0); | |
2151 } | |
2152 | |
2153 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx)); | |
2154 if (GET_MODE (op0) != DImode) | |
2155 { | |
2156 temp = gen_reg_rtx (DImode); | |
2157 convert_move (temp, op0, 0); | |
2158 } | |
2159 else | |
2160 temp = op0; | |
2161 emit_insn (gen_rtx_SET (VOIDmode, operands[0], | |
2162 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), | |
2163 gen_rtx_fmt_ee (compare_code, DImode, | |
2164 temp, const0_rtx), | |
2165 const1_rtx, | |
2166 operands[0]))); | |
2167 return 1; | |
2168 } | |
2169 else | |
2170 { | |
2171 operands[1] = gen_compare_reg (compare_code); | |
2172 | |
2173 switch (GET_MODE (operands[1])) | |
2174 { | |
2175 case CCmode : | |
2176 case CCXmode : | |
2177 case CCFPEmode : | |
2178 case CCFPmode : | |
2179 break; | |
2180 default : | |
2181 gcc_unreachable (); | |
2182 } | |
2183 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx)); | |
2184 emit_insn (gen_rtx_SET (VOIDmode, operands[0], | |
2185 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), | |
2186 gen_rtx_fmt_ee (compare_code, | |
2187 GET_MODE (operands[1]), | |
2188 operands[1], const0_rtx), | |
2189 const1_rtx, operands[0]))); | |
2190 return 1; | |
2191 } | |
2192 } | |
2193 | |
2194 /* Emit a conditional jump insn for the v9 architecture using comparison code | |
2195 CODE and jump target LABEL. | |
2196 This function exists to take advantage of the v9 brxx insns. */ | |
2197 | |
2198 void | |
2199 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) | |
2200 { | |
2201 gcc_assert (sparc_compare_emitted == NULL_RTX); | |
2202 emit_jump_insn (gen_rtx_SET (VOIDmode, | |
2203 pc_rtx, | |
2204 gen_rtx_IF_THEN_ELSE (VOIDmode, | |
2205 gen_rtx_fmt_ee (code, GET_MODE (op0), | |
2206 op0, const0_rtx), | |
2207 gen_rtx_LABEL_REF (VOIDmode, label), | |
2208 pc_rtx))); | |
2209 } | |
2210 | |
2211 /* Generate a DFmode part of a hard TFmode register. | |
2212 REG is the TFmode hard register, LOW is 1 for the | |
2213 low 64bit of the register and 0 otherwise. | |
2214 */ | |
2215 rtx | |
2216 gen_df_reg (rtx reg, int low) | |
2217 { | |
2218 int regno = REGNO (reg); | |
2219 | |
2220 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) | |
2221 regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2; | |
2222 return gen_rtx_REG (DFmode, regno); | |
2223 } | |
2224 | |
2225 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. | |
2226 Unlike normal calls, TFmode operands are passed by reference. It is | |
2227 assumed that no more than 3 operands are required. */ | |
2228 | |
2229 static void | |
2230 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) | |
2231 { | |
2232 rtx ret_slot = NULL, arg[3], func_sym; | |
2233 int i; | |
2234 | |
2235 /* We only expect to be called for conversions, unary, and binary ops. */ | |
2236 gcc_assert (nargs == 2 || nargs == 3); | |
2237 | |
2238 for (i = 0; i < nargs; ++i) | |
2239 { | |
2240 rtx this_arg = operands[i]; | |
2241 rtx this_slot; | |
2242 | |
2243 /* TFmode arguments and return values are passed by reference. */ | |
2244 if (GET_MODE (this_arg) == TFmode) | |
2245 { | |
2246 int force_stack_temp; | |
2247 | |
2248 force_stack_temp = 0; | |
2249 if (TARGET_BUGGY_QP_LIB && i == 0) | |
2250 force_stack_temp = 1; | |
2251 | |
2252 if (GET_CODE (this_arg) == MEM | |
2253 && ! force_stack_temp) | |
2254 this_arg = XEXP (this_arg, 0); | |
2255 else if (CONSTANT_P (this_arg) | |
2256 && ! force_stack_temp) | |
2257 { | |
2258 this_slot = force_const_mem (TFmode, this_arg); | |
2259 this_arg = XEXP (this_slot, 0); | |
2260 } | |
2261 else | |
2262 { | |
2263 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0); | |
2264 | |
2265 /* Operand 0 is the return value. We'll copy it out later. */ | |
2266 if (i > 0) | |
2267 emit_move_insn (this_slot, this_arg); | |
2268 else | |
2269 ret_slot = this_slot; | |
2270 | |
2271 this_arg = XEXP (this_slot, 0); | |
2272 } | |
2273 } | |
2274 | |
2275 arg[i] = this_arg; | |
2276 } | |
2277 | |
2278 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); | |
2279 | |
2280 if (GET_MODE (operands[0]) == TFmode) | |
2281 { | |
2282 if (nargs == 2) | |
2283 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2, | |
2284 arg[0], GET_MODE (arg[0]), | |
2285 arg[1], GET_MODE (arg[1])); | |
2286 else | |
2287 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3, | |
2288 arg[0], GET_MODE (arg[0]), | |
2289 arg[1], GET_MODE (arg[1]), | |
2290 arg[2], GET_MODE (arg[2])); | |
2291 | |
2292 if (ret_slot) | |
2293 emit_move_insn (operands[0], ret_slot); | |
2294 } | |
2295 else | |
2296 { | |
2297 rtx ret; | |
2298 | |
2299 gcc_assert (nargs == 2); | |
2300 | |
2301 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, | |
2302 GET_MODE (operands[0]), 1, | |
2303 arg[1], GET_MODE (arg[1])); | |
2304 | |
2305 if (ret != operands[0]) | |
2306 emit_move_insn (operands[0], ret); | |
2307 } | |
2308 } | |
2309 | |
2310 /* Expand soft-float TFmode calls to sparc abi routines. */ | |
2311 | |
2312 static void | |
2313 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) | |
2314 { | |
2315 const char *func; | |
2316 | |
2317 switch (code) | |
2318 { | |
2319 case PLUS: | |
2320 func = "_Qp_add"; | |
2321 break; | |
2322 case MINUS: | |
2323 func = "_Qp_sub"; | |
2324 break; | |
2325 case MULT: | |
2326 func = "_Qp_mul"; | |
2327 break; | |
2328 case DIV: | |
2329 func = "_Qp_div"; | |
2330 break; | |
2331 default: | |
2332 gcc_unreachable (); | |
2333 } | |
2334 | |
2335 emit_soft_tfmode_libcall (func, 3, operands); | |
2336 } | |
2337 | |
2338 static void | |
2339 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) | |
2340 { | |
2341 const char *func; | |
2342 | |
2343 gcc_assert (code == SQRT); | |
2344 func = "_Qp_sqrt"; | |
2345 | |
2346 emit_soft_tfmode_libcall (func, 2, operands); | |
2347 } | |
2348 | |
2349 static void | |
2350 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) | |
2351 { | |
2352 const char *func; | |
2353 | |
2354 switch (code) | |
2355 { | |
2356 case FLOAT_EXTEND: | |
2357 switch (GET_MODE (operands[1])) | |
2358 { | |
2359 case SFmode: | |
2360 func = "_Qp_stoq"; | |
2361 break; | |
2362 case DFmode: | |
2363 func = "_Qp_dtoq"; | |
2364 break; | |
2365 default: | |
2366 gcc_unreachable (); | |
2367 } | |
2368 break; | |
2369 | |
2370 case FLOAT_TRUNCATE: | |
2371 switch (GET_MODE (operands[0])) | |
2372 { | |
2373 case SFmode: | |
2374 func = "_Qp_qtos"; | |
2375 break; | |
2376 case DFmode: | |
2377 func = "_Qp_qtod"; | |
2378 break; | |
2379 default: | |
2380 gcc_unreachable (); | |
2381 } | |
2382 break; | |
2383 | |
2384 case FLOAT: | |
2385 switch (GET_MODE (operands[1])) | |
2386 { | |
2387 case SImode: | |
2388 func = "_Qp_itoq"; | |
2389 if (TARGET_ARCH64) | |
2390 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); | |
2391 break; | |
2392 case DImode: | |
2393 func = "_Qp_xtoq"; | |
2394 break; | |
2395 default: | |
2396 gcc_unreachable (); | |
2397 } | |
2398 break; | |
2399 | |
2400 case UNSIGNED_FLOAT: | |
2401 switch (GET_MODE (operands[1])) | |
2402 { | |
2403 case SImode: | |
2404 func = "_Qp_uitoq"; | |
2405 if (TARGET_ARCH64) | |
2406 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); | |
2407 break; | |
2408 case DImode: | |
2409 func = "_Qp_uxtoq"; | |
2410 break; | |
2411 default: | |
2412 gcc_unreachable (); | |
2413 } | |
2414 break; | |
2415 | |
2416 case FIX: | |
2417 switch (GET_MODE (operands[0])) | |
2418 { | |
2419 case SImode: | |
2420 func = "_Qp_qtoi"; | |
2421 break; | |
2422 case DImode: | |
2423 func = "_Qp_qtox"; | |
2424 break; | |
2425 default: | |
2426 gcc_unreachable (); | |
2427 } | |
2428 break; | |
2429 | |
2430 case UNSIGNED_FIX: | |
2431 switch (GET_MODE (operands[0])) | |
2432 { | |
2433 case SImode: | |
2434 func = "_Qp_qtoui"; | |
2435 break; | |
2436 case DImode: | |
2437 func = "_Qp_qtoux"; | |
2438 break; | |
2439 default: | |
2440 gcc_unreachable (); | |
2441 } | |
2442 break; | |
2443 | |
2444 default: | |
2445 gcc_unreachable (); | |
2446 } | |
2447 | |
2448 emit_soft_tfmode_libcall (func, 2, operands); | |
2449 } | |
2450 | |
2451 /* Expand a hard-float tfmode operation. All arguments must be in | |
2452 registers. */ | |
2453 | |
2454 static void | |
2455 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) | |
2456 { | |
2457 rtx op, dest; | |
2458 | |
2459 if (GET_RTX_CLASS (code) == RTX_UNARY) | |
2460 { | |
2461 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); | |
2462 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); | |
2463 } | |
2464 else | |
2465 { | |
2466 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); | |
2467 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); | |
2468 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), | |
2469 operands[1], operands[2]); | |
2470 } | |
2471 | |
2472 if (register_operand (operands[0], VOIDmode)) | |
2473 dest = operands[0]; | |
2474 else | |
2475 dest = gen_reg_rtx (GET_MODE (operands[0])); | |
2476 | |
2477 emit_insn (gen_rtx_SET (VOIDmode, dest, op)); | |
2478 | |
2479 if (dest != operands[0]) | |
2480 emit_move_insn (operands[0], dest); | |
2481 } | |
2482 | |
2483 void | |
2484 emit_tfmode_binop (enum rtx_code code, rtx *operands) | |
2485 { | |
2486 if (TARGET_HARD_QUAD) | |
2487 emit_hard_tfmode_operation (code, operands); | |
2488 else | |
2489 emit_soft_tfmode_binop (code, operands); | |
2490 } | |
2491 | |
2492 void | |
2493 emit_tfmode_unop (enum rtx_code code, rtx *operands) | |
2494 { | |
2495 if (TARGET_HARD_QUAD) | |
2496 emit_hard_tfmode_operation (code, operands); | |
2497 else | |
2498 emit_soft_tfmode_unop (code, operands); | |
2499 } | |
2500 | |
2501 void | |
2502 emit_tfmode_cvt (enum rtx_code code, rtx *operands) | |
2503 { | |
2504 if (TARGET_HARD_QUAD) | |
2505 emit_hard_tfmode_operation (code, operands); | |
2506 else | |
2507 emit_soft_tfmode_cvt (code, operands); | |
2508 } | |
2509 | |
2510 /* Return nonzero if a branch/jump/call instruction will be emitting | |
2511 nop into its delay slot. */ | |
2512 | |
2513 int | |
2514 empty_delay_slot (rtx insn) | |
2515 { | |
2516 rtx seq; | |
2517 | |
2518 /* If no previous instruction (should not happen), return true. */ | |
2519 if (PREV_INSN (insn) == NULL) | |
2520 return 1; | |
2521 | |
2522 seq = NEXT_INSN (PREV_INSN (insn)); | |
2523 if (GET_CODE (PATTERN (seq)) == SEQUENCE) | |
2524 return 0; | |
2525 | |
2526 return 1; | |
2527 } | |
2528 | |
2529 /* Return nonzero if TRIAL can go into the call delay slot. */ | |
2530 | |
2531 int | |
2532 tls_call_delay (rtx trial) | |
2533 { | |
2534 rtx pat; | |
2535 | |
2536 /* Binutils allows | |
2537 call __tls_get_addr, %tgd_call (foo) | |
2538 add %l7, %o0, %o0, %tgd_add (foo) | |
2539 while Sun as/ld does not. */ | |
2540 if (TARGET_GNU_TLS || !TARGET_TLS) | |
2541 return 1; | |
2542 | |
2543 pat = PATTERN (trial); | |
2544 | |
2545 /* We must reject tgd_add{32|64}, i.e. | |
2546 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) | |
2547 and tldm_add{32|64}, i.e. | |
2548 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) | |
2549 for Sun as/ld. */ | |
2550 if (GET_CODE (pat) == SET | |
2551 && GET_CODE (SET_SRC (pat)) == PLUS) | |
2552 { | |
2553 rtx unspec = XEXP (SET_SRC (pat), 1); | |
2554 | |
2555 if (GET_CODE (unspec) == UNSPEC | |
2556 && (XINT (unspec, 1) == UNSPEC_TLSGD | |
2557 || XINT (unspec, 1) == UNSPEC_TLSLDM)) | |
2558 return 0; | |
2559 } | |
2560 | |
2561 return 1; | |
2562 } | |
2563 | |
2564 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore' | |
2565 instruction. RETURN_P is true if the v9 variant 'return' is to be | |
2566 considered in the test too. | |
2567 | |
2568 TRIAL must be a SET whose destination is a REG appropriate for the | |
2569 'restore' instruction or, if RETURN_P is true, for the 'return' | |
2570 instruction. */ | |
2571 | |
2572 static int | |
2573 eligible_for_restore_insn (rtx trial, bool return_p) | |
2574 { | |
2575 rtx pat = PATTERN (trial); | |
2576 rtx src = SET_SRC (pat); | |
2577 | |
2578 /* The 'restore src,%g0,dest' pattern for word mode and below. */ | |
2579 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT | |
2580 && arith_operand (src, GET_MODE (src))) | |
2581 { | |
2582 if (TARGET_ARCH64) | |
2583 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); | |
2584 else | |
2585 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); | |
2586 } | |
2587 | |
2588 /* The 'restore src,%g0,dest' pattern for double-word mode. */ | |
2589 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT | |
2590 && arith_double_operand (src, GET_MODE (src))) | |
2591 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); | |
2592 | |
2593 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ | |
2594 else if (! TARGET_FPU && register_operand (src, SFmode)) | |
2595 return 1; | |
2596 | |
2597 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ | |
2598 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) | |
2599 return 1; | |
2600 | |
2601 /* If we have the 'return' instruction, anything that does not use | |
2602 local or output registers and can go into a delay slot wins. */ | |
2603 else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1) | |
2604 && (get_attr_in_uncond_branch_delay (trial) | |
2605 == IN_UNCOND_BRANCH_DELAY_TRUE)) | |
2606 return 1; | |
2607 | |
2608 /* The 'restore src1,src2,dest' pattern for SImode. */ | |
2609 else if (GET_CODE (src) == PLUS | |
2610 && register_operand (XEXP (src, 0), SImode) | |
2611 && arith_operand (XEXP (src, 1), SImode)) | |
2612 return 1; | |
2613 | |
2614 /* The 'restore src1,src2,dest' pattern for DImode. */ | |
2615 else if (GET_CODE (src) == PLUS | |
2616 && register_operand (XEXP (src, 0), DImode) | |
2617 && arith_double_operand (XEXP (src, 1), DImode)) | |
2618 return 1; | |
2619 | |
2620 /* The 'restore src1,%lo(src2),dest' pattern. */ | |
2621 else if (GET_CODE (src) == LO_SUM | |
2622 && ! TARGET_CM_MEDMID | |
2623 && ((register_operand (XEXP (src, 0), SImode) | |
2624 && immediate_operand (XEXP (src, 1), SImode)) | |
2625 || (TARGET_ARCH64 | |
2626 && register_operand (XEXP (src, 0), DImode) | |
2627 && immediate_operand (XEXP (src, 1), DImode)))) | |
2628 return 1; | |
2629 | |
2630 /* The 'restore src,src,dest' pattern. */ | |
2631 else if (GET_CODE (src) == ASHIFT | |
2632 && (register_operand (XEXP (src, 0), SImode) | |
2633 || register_operand (XEXP (src, 0), DImode)) | |
2634 && XEXP (src, 1) == const1_rtx) | |
2635 return 1; | |
2636 | |
2637 return 0; | |
2638 } | |
2639 | |
2640 /* Return nonzero if TRIAL can go into the function return's | |
2641 delay slot. */ | |
2642 | |
2643 int | |
2644 eligible_for_return_delay (rtx trial) | |
2645 { | |
2646 rtx pat; | |
2647 | |
2648 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET) | |
2649 return 0; | |
2650 | |
2651 if (get_attr_length (trial) != 1) | |
2652 return 0; | |
2653 | |
2654 /* If there are any call-saved registers, we should scan TRIAL if it | |
2655 does not reference them. For now just make it easy. */ | |
2656 if (num_gfregs) | |
2657 return 0; | |
2658 | |
2659 /* If the function uses __builtin_eh_return, the eh_return machinery | |
2660 occupies the delay slot. */ | |
2661 if (crtl->calls_eh_return) | |
2662 return 0; | |
2663 | |
2664 /* In the case of a true leaf function, anything can go into the slot. */ | |
2665 if (sparc_leaf_function_p) | |
2666 return get_attr_in_uncond_branch_delay (trial) | |
2667 == IN_UNCOND_BRANCH_DELAY_TRUE; | |
2668 | |
2669 pat = PATTERN (trial); | |
2670 | |
2671 /* Otherwise, only operations which can be done in tandem with | |
2672 a `restore' or `return' insn can go into the delay slot. */ | |
2673 if (GET_CODE (SET_DEST (pat)) != REG | |
2674 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)) | |
2675 return 0; | |
2676 | |
2677 /* If this instruction sets up floating point register and we have a return | |
2678 instruction, it can probably go in. But restore will not work | |
2679 with FP_REGS. */ | |
2680 if (REGNO (SET_DEST (pat)) >= 32) | |
2681 return (TARGET_V9 | |
2682 && ! epilogue_renumber (&pat, 1) | |
2683 && (get_attr_in_uncond_branch_delay (trial) | |
2684 == IN_UNCOND_BRANCH_DELAY_TRUE)); | |
2685 | |
2686 return eligible_for_restore_insn (trial, true); | |
2687 } | |
2688 | |
2689 /* Return nonzero if TRIAL can go into the sibling call's | |
2690 delay slot. */ | |
2691 | |
2692 int | |
2693 eligible_for_sibcall_delay (rtx trial) | |
2694 { | |
2695 rtx pat; | |
2696 | |
2697 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET) | |
2698 return 0; | |
2699 | |
2700 if (get_attr_length (trial) != 1) | |
2701 return 0; | |
2702 | |
2703 pat = PATTERN (trial); | |
2704 | |
2705 if (sparc_leaf_function_p) | |
2706 { | |
2707 /* If the tail call is done using the call instruction, | |
2708 we have to restore %o7 in the delay slot. */ | |
2709 if (LEAF_SIBCALL_SLOT_RESERVED_P) | |
2710 return 0; | |
2711 | |
2712 /* %g1 is used to build the function address */ | |
2713 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) | |
2714 return 0; | |
2715 | |
2716 return 1; | |
2717 } | |
2718 | |
2719 /* Otherwise, only operations which can be done in tandem with | |
2720 a `restore' insn can go into the delay slot. */ | |
2721 if (GET_CODE (SET_DEST (pat)) != REG | |
2722 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) | |
2723 || REGNO (SET_DEST (pat)) >= 32) | |
2724 return 0; | |
2725 | |
2726 /* If it mentions %o7, it can't go in, because sibcall will clobber it | |
2727 in most cases. */ | |
2728 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) | |
2729 return 0; | |
2730 | |
2731 return eligible_for_restore_insn (trial, false); | |
2732 } | |
2733 | |
2734 int | |
2735 short_branch (int uid1, int uid2) | |
2736 { | |
2737 int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2); | |
2738 | |
2739 /* Leave a few words of "slop". */ | |
2740 if (delta >= -1023 && delta <= 1022) | |
2741 return 1; | |
2742 | |
2743 return 0; | |
2744 } | |
2745 | |
2746 /* Return nonzero if REG is not used after INSN. | |
2747 We assume REG is a reload reg, and therefore does | |
2748 not live past labels or calls or jumps. */ | |
2749 int | |
2750 reg_unused_after (rtx reg, rtx insn) | |
2751 { | |
2752 enum rtx_code code, prev_code = UNKNOWN; | |
2753 | |
2754 while ((insn = NEXT_INSN (insn))) | |
2755 { | |
2756 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)]) | |
2757 return 1; | |
2758 | |
2759 code = GET_CODE (insn); | |
2760 if (GET_CODE (insn) == CODE_LABEL) | |
2761 return 1; | |
2762 | |
2763 if (INSN_P (insn)) | |
2764 { | |
2765 rtx set = single_set (insn); | |
2766 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set)); | |
2767 if (set && in_src) | |
2768 return 0; | |
2769 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) | |
2770 return 1; | |
2771 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) | |
2772 return 0; | |
2773 } | |
2774 prev_code = code; | |
2775 } | |
2776 return 1; | |
2777 } | |
2778 | |
2779 /* Determine if it's legal to put X into the constant pool. This | |
2780 is not possible if X contains the address of a symbol that is | |
2781 not constant (TLS) or not known at final link time (PIC). */ | |
2782 | |
2783 static bool | |
2784 sparc_cannot_force_const_mem (rtx x) | |
2785 { | |
2786 switch (GET_CODE (x)) | |
2787 { | |
2788 case CONST_INT: | |
2789 case CONST_DOUBLE: | |
2790 case CONST_VECTOR: | |
2791 /* Accept all non-symbolic constants. */ | |
2792 return false; | |
2793 | |
2794 case LABEL_REF: | |
2795 /* Labels are OK iff we are non-PIC. */ | |
2796 return flag_pic != 0; | |
2797 | |
2798 case SYMBOL_REF: | |
2799 /* 'Naked' TLS symbol references are never OK, | |
2800 non-TLS symbols are OK iff we are non-PIC. */ | |
2801 if (SYMBOL_REF_TLS_MODEL (x)) | |
2802 return true; | |
2803 else | |
2804 return flag_pic != 0; | |
2805 | |
2806 case CONST: | |
2807 return sparc_cannot_force_const_mem (XEXP (x, 0)); | |
2808 case PLUS: | |
2809 case MINUS: | |
2810 return sparc_cannot_force_const_mem (XEXP (x, 0)) | |
2811 || sparc_cannot_force_const_mem (XEXP (x, 1)); | |
2812 case UNSPEC: | |
2813 return true; | |
2814 default: | |
2815 gcc_unreachable (); | |
2816 } | |
2817 } | |
2818 | |
2819 /* PIC support. */ | |
2820 static GTY(()) char pic_helper_symbol_name[256]; | |
2821 static GTY(()) rtx pic_helper_symbol; | |
2822 static GTY(()) bool pic_helper_emitted_p = false; | |
2823 static GTY(()) rtx global_offset_table; | |
2824 | |
2825 /* Ensure that we are not using patterns that are not OK with PIC. */ | |
2826 | |
2827 int | |
2828 check_pic (int i) | |
2829 { | |
2830 switch (flag_pic) | |
2831 { | |
2832 case 1: | |
2833 gcc_assert (GET_CODE (recog_data.operand[i]) != SYMBOL_REF | |
2834 && (GET_CODE (recog_data.operand[i]) != CONST | |
2835 || (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS | |
2836 && (XEXP (XEXP (recog_data.operand[i], 0), 0) | |
2837 == global_offset_table) | |
2838 && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1)) | |
2839 == CONST)))); | |
2840 case 2: | |
2841 default: | |
2842 return 1; | |
2843 } | |
2844 } | |
2845 | |
2846 /* Return true if X is an address which needs a temporary register when | |
2847 reloaded while generating PIC code. */ | |
2848 | |
2849 int | |
2850 pic_address_needs_scratch (rtx x) | |
2851 { | |
2852 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ | |
2853 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS | |
2854 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF | |
2855 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT | |
2856 && ! SMALL_INT (XEXP (XEXP (x, 0), 1))) | |
2857 return 1; | |
2858 | |
2859 return 0; | |
2860 } | |
2861 | |
2862 /* Determine if a given RTX is a valid constant. We already know this | |
2863 satisfies CONSTANT_P. */ | |
2864 | |
2865 bool | |
2866 legitimate_constant_p (rtx x) | |
2867 { | |
2868 rtx inner; | |
2869 | |
2870 switch (GET_CODE (x)) | |
2871 { | |
2872 case SYMBOL_REF: | |
2873 /* TLS symbols are not constant. */ | |
2874 if (SYMBOL_REF_TLS_MODEL (x)) | |
2875 return false; | |
2876 break; | |
2877 | |
2878 case CONST: | |
2879 inner = XEXP (x, 0); | |
2880 | |
2881 /* Offsets of TLS symbols are never valid. | |
2882 Discourage CSE from creating them. */ | |
2883 if (GET_CODE (inner) == PLUS | |
2884 && SPARC_SYMBOL_REF_TLS_P (XEXP (inner, 0))) | |
2885 return false; | |
2886 break; | |
2887 | |
2888 case CONST_DOUBLE: | |
2889 if (GET_MODE (x) == VOIDmode) | |
2890 return true; | |
2891 | |
2892 /* Floating point constants are generally not ok. | |
2893 The only exception is 0.0 in VIS. */ | |
2894 if (TARGET_VIS | |
2895 && SCALAR_FLOAT_MODE_P (GET_MODE (x)) | |
2896 && const_zero_operand (x, GET_MODE (x))) | |
2897 return true; | |
2898 | |
2899 return false; | |
2900 | |
2901 case CONST_VECTOR: | |
2902 /* Vector constants are generally not ok. | |
2903 The only exception is 0 in VIS. */ | |
2904 if (TARGET_VIS | |
2905 && const_zero_operand (x, GET_MODE (x))) | |
2906 return true; | |
2907 | |
2908 return false; | |
2909 | |
2910 default: | |
2911 break; | |
2912 } | |
2913 | |
2914 return true; | |
2915 } | |
2916 | |
2917 /* Determine if a given RTX is a valid constant address. */ | |
2918 | |
2919 bool | |
2920 constant_address_p (rtx x) | |
2921 { | |
2922 switch (GET_CODE (x)) | |
2923 { | |
2924 case LABEL_REF: | |
2925 case CONST_INT: | |
2926 case HIGH: | |
2927 return true; | |
2928 | |
2929 case CONST: | |
2930 if (flag_pic && pic_address_needs_scratch (x)) | |
2931 return false; | |
2932 return legitimate_constant_p (x); | |
2933 | |
2934 case SYMBOL_REF: | |
2935 return !flag_pic && legitimate_constant_p (x); | |
2936 | |
2937 default: | |
2938 return false; | |
2939 } | |
2940 } | |
2941 | |
2942 /* Nonzero if the constant value X is a legitimate general operand | |
2943 when generating PIC code. It is given that flag_pic is on and | |
2944 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ | |
2945 | |
2946 bool | |
2947 legitimate_pic_operand_p (rtx x) | |
2948 { | |
2949 if (pic_address_needs_scratch (x)) | |
2950 return false; | |
2951 if (SPARC_SYMBOL_REF_TLS_P (x) | |
2952 || (GET_CODE (x) == CONST | |
2953 && GET_CODE (XEXP (x, 0)) == PLUS | |
2954 && SPARC_SYMBOL_REF_TLS_P (XEXP (XEXP (x, 0), 0)))) | |
2955 return false; | |
2956 return true; | |
2957 } | |
2958 | |
2959 /* Return nonzero if ADDR is a valid memory address. | |
2960 STRICT specifies whether strict register checking applies. */ | |
2961 | |
2962 int | |
2963 legitimate_address_p (enum machine_mode mode, rtx addr, int strict) | |
2964 { | |
2965 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; | |
2966 | |
2967 if (REG_P (addr) || GET_CODE (addr) == SUBREG) | |
2968 rs1 = addr; | |
2969 else if (GET_CODE (addr) == PLUS) | |
2970 { | |
2971 rs1 = XEXP (addr, 0); | |
2972 rs2 = XEXP (addr, 1); | |
2973 | |
2974 /* Canonicalize. REG comes first, if there are no regs, | |
2975 LO_SUM comes first. */ | |
2976 if (!REG_P (rs1) | |
2977 && GET_CODE (rs1) != SUBREG | |
2978 && (REG_P (rs2) | |
2979 || GET_CODE (rs2) == SUBREG | |
2980 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) | |
2981 { | |
2982 rs1 = XEXP (addr, 1); | |
2983 rs2 = XEXP (addr, 0); | |
2984 } | |
2985 | |
2986 if ((flag_pic == 1 | |
2987 && rs1 == pic_offset_table_rtx | |
2988 && !REG_P (rs2) | |
2989 && GET_CODE (rs2) != SUBREG | |
2990 && GET_CODE (rs2) != LO_SUM | |
2991 && GET_CODE (rs2) != MEM | |
2992 && ! SPARC_SYMBOL_REF_TLS_P (rs2) | |
2993 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) | |
2994 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) | |
2995 || ((REG_P (rs1) | |
2996 || GET_CODE (rs1) == SUBREG) | |
2997 && RTX_OK_FOR_OFFSET_P (rs2))) | |
2998 { | |
2999 imm1 = rs2; | |
3000 rs2 = NULL; | |
3001 } | |
3002 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) | |
3003 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) | |
3004 { | |
3005 /* We prohibit REG + REG for TFmode when there are no quad move insns | |
3006 and we consequently need to split. We do this because REG+REG | |
3007 is not an offsettable address. If we get the situation in reload | |
3008 where source and destination of a movtf pattern are both MEMs with | |
3009 REG+REG address, then only one of them gets converted to an | |
3010 offsettable address. */ | |
3011 if (mode == TFmode | |
3012 && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD)) | |
3013 return 0; | |
3014 | |
3015 /* We prohibit REG + REG on ARCH32 if not optimizing for | |
3016 DFmode/DImode because then mem_min_alignment is likely to be zero | |
3017 after reload and the forced split would lack a matching splitter | |
3018 pattern. */ | |
3019 if (TARGET_ARCH32 && !optimize | |
3020 && (mode == DFmode || mode == DImode)) | |
3021 return 0; | |
3022 } | |
3023 else if (USE_AS_OFFSETABLE_LO10 | |
3024 && GET_CODE (rs1) == LO_SUM | |
3025 && TARGET_ARCH64 | |
3026 && ! TARGET_CM_MEDMID | |
3027 && RTX_OK_FOR_OLO10_P (rs2)) | |
3028 { | |
3029 rs2 = NULL; | |
3030 imm1 = XEXP (rs1, 1); | |
3031 rs1 = XEXP (rs1, 0); | |
3032 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1)) | |
3033 return 0; | |
3034 } | |
3035 } | |
3036 else if (GET_CODE (addr) == LO_SUM) | |
3037 { | |
3038 rs1 = XEXP (addr, 0); | |
3039 imm1 = XEXP (addr, 1); | |
3040 | |
3041 if (! CONSTANT_P (imm1) || SPARC_SYMBOL_REF_TLS_P (rs1)) | |
3042 return 0; | |
3043 | |
3044 /* We can't allow TFmode in 32-bit mode, because an offset greater | |
3045 than the alignment (8) may cause the LO_SUM to overflow. */ | |
3046 if (mode == TFmode && TARGET_ARCH32) | |
3047 return 0; | |
3048 } | |
3049 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) | |
3050 return 1; | |
3051 else | |
3052 return 0; | |
3053 | |
3054 if (GET_CODE (rs1) == SUBREG) | |
3055 rs1 = SUBREG_REG (rs1); | |
3056 if (!REG_P (rs1)) | |
3057 return 0; | |
3058 | |
3059 if (rs2) | |
3060 { | |
3061 if (GET_CODE (rs2) == SUBREG) | |
3062 rs2 = SUBREG_REG (rs2); | |
3063 if (!REG_P (rs2)) | |
3064 return 0; | |
3065 } | |
3066 | |
3067 if (strict) | |
3068 { | |
3069 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) | |
3070 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) | |
3071 return 0; | |
3072 } | |
3073 else | |
3074 { | |
3075 if ((REGNO (rs1) >= 32 | |
3076 && REGNO (rs1) != FRAME_POINTER_REGNUM | |
3077 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) | |
3078 || (rs2 | |
3079 && (REGNO (rs2) >= 32 | |
3080 && REGNO (rs2) != FRAME_POINTER_REGNUM | |
3081 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) | |
3082 return 0; | |
3083 } | |
3084 return 1; | |
3085 } | |
3086 | |
3087 /* Construct the SYMBOL_REF for the tls_get_offset function. */ | |
3088 | |
3089 static GTY(()) rtx sparc_tls_symbol; | |
3090 | |
3091 static rtx | |
3092 sparc_tls_get_addr (void) | |
3093 { | |
3094 if (!sparc_tls_symbol) | |
3095 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); | |
3096 | |
3097 return sparc_tls_symbol; | |
3098 } | |
3099 | |
3100 static rtx | |
3101 sparc_tls_got (void) | |
3102 { | |
3103 rtx temp; | |
3104 if (flag_pic) | |
3105 { | |
3106 crtl->uses_pic_offset_table = 1; | |
3107 return pic_offset_table_rtx; | |
3108 } | |
3109 | |
3110 if (!global_offset_table) | |
3111 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); | |
3112 temp = gen_reg_rtx (Pmode); | |
3113 emit_move_insn (temp, global_offset_table); | |
3114 return temp; | |
3115 } | |
3116 | |
3117 /* Return 1 if *X is a thread-local symbol. */ | |
3118 | |
3119 static int | |
3120 sparc_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED) | |
3121 { | |
3122 return SPARC_SYMBOL_REF_TLS_P (*x); | |
3123 } | |
3124 | |
3125 /* Return 1 if X contains a thread-local symbol. */ | |
3126 | |
3127 bool | |
3128 sparc_tls_referenced_p (rtx x) | |
3129 { | |
3130 if (!TARGET_HAVE_TLS) | |
3131 return false; | |
3132 | |
3133 return for_each_rtx (&x, &sparc_tls_symbol_ref_1, 0); | |
3134 } | |
3135 | |
3136 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute | |
3137 this (thread-local) address. */ | |
3138 | |
3139 rtx | |
3140 legitimize_tls_address (rtx addr) | |
3141 { | |
3142 rtx temp1, temp2, temp3, ret, o0, got, insn; | |
3143 | |
3144 gcc_assert (can_create_pseudo_p ()); | |
3145 | |
3146 if (GET_CODE (addr) == SYMBOL_REF) | |
3147 switch (SYMBOL_REF_TLS_MODEL (addr)) | |
3148 { | |
3149 case TLS_MODEL_GLOBAL_DYNAMIC: | |
3150 start_sequence (); | |
3151 temp1 = gen_reg_rtx (SImode); | |
3152 temp2 = gen_reg_rtx (SImode); | |
3153 ret = gen_reg_rtx (Pmode); | |
3154 o0 = gen_rtx_REG (Pmode, 8); | |
3155 got = sparc_tls_got (); | |
3156 emit_insn (gen_tgd_hi22 (temp1, addr)); | |
3157 emit_insn (gen_tgd_lo10 (temp2, temp1, addr)); | |
3158 if (TARGET_ARCH32) | |
3159 { | |
3160 emit_insn (gen_tgd_add32 (o0, got, temp2, addr)); | |
3161 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (), | |
3162 addr, const1_rtx)); | |
3163 } | |
3164 else | |
3165 { | |
3166 emit_insn (gen_tgd_add64 (o0, got, temp2, addr)); | |
3167 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (), | |
3168 addr, const1_rtx)); | |
3169 } | |
3170 CALL_INSN_FUNCTION_USAGE (insn) | |
3171 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0), | |
3172 CALL_INSN_FUNCTION_USAGE (insn)); | |
3173 insn = get_insns (); | |
3174 end_sequence (); | |
3175 emit_libcall_block (insn, ret, o0, addr); | |
3176 break; | |
3177 | |
3178 case TLS_MODEL_LOCAL_DYNAMIC: | |
3179 start_sequence (); | |
3180 temp1 = gen_reg_rtx (SImode); | |
3181 temp2 = gen_reg_rtx (SImode); | |
3182 temp3 = gen_reg_rtx (Pmode); | |
3183 ret = gen_reg_rtx (Pmode); | |
3184 o0 = gen_rtx_REG (Pmode, 8); | |
3185 got = sparc_tls_got (); | |
3186 emit_insn (gen_tldm_hi22 (temp1)); | |
3187 emit_insn (gen_tldm_lo10 (temp2, temp1)); | |
3188 if (TARGET_ARCH32) | |
3189 { | |
3190 emit_insn (gen_tldm_add32 (o0, got, temp2)); | |
3191 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (), | |
3192 const1_rtx)); | |
3193 } | |
3194 else | |
3195 { | |
3196 emit_insn (gen_tldm_add64 (o0, got, temp2)); | |
3197 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (), | |
3198 const1_rtx)); | |
3199 } | |
3200 CALL_INSN_FUNCTION_USAGE (insn) | |
3201 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0), | |
3202 CALL_INSN_FUNCTION_USAGE (insn)); | |
3203 insn = get_insns (); | |
3204 end_sequence (); | |
3205 emit_libcall_block (insn, temp3, o0, | |
3206 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), | |
3207 UNSPEC_TLSLD_BASE)); | |
3208 temp1 = gen_reg_rtx (SImode); | |
3209 temp2 = gen_reg_rtx (SImode); | |
3210 emit_insn (gen_tldo_hix22 (temp1, addr)); | |
3211 emit_insn (gen_tldo_lox10 (temp2, temp1, addr)); | |
3212 if (TARGET_ARCH32) | |
3213 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr)); | |
3214 else | |
3215 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr)); | |
3216 break; | |
3217 | |
3218 case TLS_MODEL_INITIAL_EXEC: | |
3219 temp1 = gen_reg_rtx (SImode); | |
3220 temp2 = gen_reg_rtx (SImode); | |
3221 temp3 = gen_reg_rtx (Pmode); | |
3222 got = sparc_tls_got (); | |
3223 emit_insn (gen_tie_hi22 (temp1, addr)); | |
3224 emit_insn (gen_tie_lo10 (temp2, temp1, addr)); | |
3225 if (TARGET_ARCH32) | |
3226 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); | |
3227 else | |
3228 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); | |
3229 if (TARGET_SUN_TLS) | |
3230 { | |
3231 ret = gen_reg_rtx (Pmode); | |
3232 if (TARGET_ARCH32) | |
3233 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7), | |
3234 temp3, addr)); | |
3235 else | |
3236 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7), | |
3237 temp3, addr)); | |
3238 } | |
3239 else | |
3240 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); | |
3241 break; | |
3242 | |
3243 case TLS_MODEL_LOCAL_EXEC: | |
3244 temp1 = gen_reg_rtx (Pmode); | |
3245 temp2 = gen_reg_rtx (Pmode); | |
3246 if (TARGET_ARCH32) | |
3247 { | |
3248 emit_insn (gen_tle_hix22_sp32 (temp1, addr)); | |
3249 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr)); | |
3250 } | |
3251 else | |
3252 { | |
3253 emit_insn (gen_tle_hix22_sp64 (temp1, addr)); | |
3254 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr)); | |
3255 } | |
3256 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); | |
3257 break; | |
3258 | |
3259 default: | |
3260 gcc_unreachable (); | |
3261 } | |
3262 | |
3263 else | |
3264 gcc_unreachable (); /* for now ... */ | |
3265 | |
3266 return ret; | |
3267 } | |
3268 | |
3269 | |
3270 /* Legitimize PIC addresses. If the address is already position-independent, | |
3271 we return ORIG. Newly generated position-independent addresses go into a | |
3272 reg. This is REG if nonzero, otherwise we allocate register(s) as | |
3273 necessary. */ | |
3274 | |
3275 rtx | |
3276 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED, | |
3277 rtx reg) | |
3278 { | |
3279 if (GET_CODE (orig) == SYMBOL_REF | |
3280 /* See the comment in sparc_expand_move. */ | |
3281 || (TARGET_VXWORKS_RTP && GET_CODE (orig) == LABEL_REF)) | |
3282 { | |
3283 rtx pic_ref, address; | |
3284 rtx insn; | |
3285 | |
3286 if (reg == 0) | |
3287 { | |
3288 gcc_assert (! reload_in_progress && ! reload_completed); | |
3289 reg = gen_reg_rtx (Pmode); | |
3290 } | |
3291 | |
3292 if (flag_pic == 2) | |
3293 { | |
3294 /* If not during reload, allocate another temp reg here for loading | |
3295 in the address, so that these instructions can be optimized | |
3296 properly. */ | |
3297 rtx temp_reg = ((reload_in_progress || reload_completed) | |
3298 ? reg : gen_reg_rtx (Pmode)); | |
3299 | |
3300 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse | |
3301 won't get confused into thinking that these two instructions | |
3302 are loading in the true address of the symbol. If in the | |
3303 future a PIC rtx exists, that should be used instead. */ | |
3304 if (TARGET_ARCH64) | |
3305 { | |
3306 emit_insn (gen_movdi_high_pic (temp_reg, orig)); | |
3307 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); | |
3308 } | |
3309 else | |
3310 { | |
3311 emit_insn (gen_movsi_high_pic (temp_reg, orig)); | |
3312 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); | |
3313 } | |
3314 address = temp_reg; | |
3315 } | |
3316 else | |
3317 address = orig; | |
3318 | |
3319 pic_ref = gen_const_mem (Pmode, | |
3320 gen_rtx_PLUS (Pmode, | |
3321 pic_offset_table_rtx, address)); | |
3322 crtl->uses_pic_offset_table = 1; | |
3323 insn = emit_move_insn (reg, pic_ref); | |
3324 /* Put a REG_EQUAL note on this insn, so that it can be optimized | |
3325 by loop. */ | |
3326 set_unique_reg_note (insn, REG_EQUAL, orig); | |
3327 return reg; | |
3328 } | |
3329 else if (GET_CODE (orig) == CONST) | |
3330 { | |
3331 rtx base, offset; | |
3332 | |
3333 if (GET_CODE (XEXP (orig, 0)) == PLUS | |
3334 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) | |
3335 return orig; | |
3336 | |
3337 if (reg == 0) | |
3338 { | |
3339 gcc_assert (! reload_in_progress && ! reload_completed); | |
3340 reg = gen_reg_rtx (Pmode); | |
3341 } | |
3342 | |
3343 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); | |
3344 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); | |
3345 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, | |
3346 base == reg ? 0 : reg); | |
3347 | |
3348 if (GET_CODE (offset) == CONST_INT) | |
3349 { | |
3350 if (SMALL_INT (offset)) | |
3351 return plus_constant (base, INTVAL (offset)); | |
3352 else if (! reload_in_progress && ! reload_completed) | |
3353 offset = force_reg (Pmode, offset); | |
3354 else | |
3355 /* If we reach here, then something is seriously wrong. */ | |
3356 gcc_unreachable (); | |
3357 } | |
3358 return gen_rtx_PLUS (Pmode, base, offset); | |
3359 } | |
3360 else if (GET_CODE (orig) == LABEL_REF) | |
3361 /* ??? Why do we do this? */ | |
3362 /* Now movsi_pic_label_ref uses it, but we ought to be checking that | |
3363 the register is live instead, in case it is eliminated. */ | |
3364 crtl->uses_pic_offset_table = 1; | |
3365 | |
3366 return orig; | |
3367 } | |
3368 | |
3369 /* Try machine-dependent ways of modifying an illegitimate address X | |
3370 to be legitimate. If we find one, return the new, valid address. | |
3371 | |
3372 OLDX is the address as it was before break_out_memory_refs was called. | |
3373 In some cases it is useful to look at this to decide what needs to be done. | |
3374 | |
3375 MODE is the mode of the operand pointed to by X. */ | |
3376 | |
3377 rtx | |
3378 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) | |
3379 { | |
3380 rtx orig_x = x; | |
3381 | |
3382 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) | |
3383 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), | |
3384 force_operand (XEXP (x, 0), NULL_RTX)); | |
3385 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) | |
3386 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
3387 force_operand (XEXP (x, 1), NULL_RTX)); | |
3388 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) | |
3389 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), | |
3390 XEXP (x, 1)); | |
3391 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) | |
3392 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
3393 force_operand (XEXP (x, 1), NULL_RTX)); | |
3394 | |
3395 if (x != orig_x && legitimate_address_p (mode, x, FALSE)) | |
3396 return x; | |
3397 | |
3398 if (SPARC_SYMBOL_REF_TLS_P (x)) | |
3399 x = legitimize_tls_address (x); | |
3400 else if (flag_pic) | |
3401 x = legitimize_pic_address (x, mode, 0); | |
3402 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) | |
3403 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
3404 copy_to_mode_reg (Pmode, XEXP (x, 1))); | |
3405 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) | |
3406 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), | |
3407 copy_to_mode_reg (Pmode, XEXP (x, 0))); | |
3408 else if (GET_CODE (x) == SYMBOL_REF | |
3409 || GET_CODE (x) == CONST | |
3410 || GET_CODE (x) == LABEL_REF) | |
3411 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); | |
3412 return x; | |
3413 } | |
3414 | |
3415 /* Emit the special PIC helper function. */ | |
3416 | |
3417 static void | |
3418 emit_pic_helper (void) | |
3419 { | |
3420 const char *pic_name = reg_names[REGNO (pic_offset_table_rtx)]; | |
3421 int align; | |
3422 | |
3423 switch_to_section (text_section); | |
3424 | |
3425 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); | |
3426 if (align > 0) | |
3427 ASM_OUTPUT_ALIGN (asm_out_file, align); | |
3428 ASM_OUTPUT_LABEL (asm_out_file, pic_helper_symbol_name); | |
3429 if (flag_delayed_branch) | |
3430 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n", | |
3431 pic_name, pic_name); | |
3432 else | |
3433 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n", | |
3434 pic_name, pic_name); | |
3435 | |
3436 pic_helper_emitted_p = true; | |
3437 } | |
3438 | |
3439 /* Emit code to load the PIC register. */ | |
3440 | |
3441 static void | |
3442 load_pic_register (bool delay_pic_helper) | |
3443 { | |
3444 int orig_flag_pic = flag_pic; | |
3445 | |
3446 if (TARGET_VXWORKS_RTP) | |
3447 { | |
3448 emit_insn (gen_vxworks_load_got ()); | |
3449 emit_use (pic_offset_table_rtx); | |
3450 return; | |
3451 } | |
3452 | |
3453 /* If we haven't initialized the special PIC symbols, do so now. */ | |
3454 if (!pic_helper_symbol_name[0]) | |
3455 { | |
3456 ASM_GENERATE_INTERNAL_LABEL (pic_helper_symbol_name, "LADDPC", 0); | |
3457 pic_helper_symbol = gen_rtx_SYMBOL_REF (Pmode, pic_helper_symbol_name); | |
3458 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); | |
3459 } | |
3460 | |
3461 /* If we haven't emitted the special PIC helper function, do so now unless | |
3462 we are requested to delay it. */ | |
3463 if (!delay_pic_helper && !pic_helper_emitted_p) | |
3464 emit_pic_helper (); | |
3465 | |
3466 flag_pic = 0; | |
3467 if (TARGET_ARCH64) | |
3468 emit_insn (gen_load_pcrel_symdi (pic_offset_table_rtx, global_offset_table, | |
3469 pic_helper_symbol)); | |
3470 else | |
3471 emit_insn (gen_load_pcrel_symsi (pic_offset_table_rtx, global_offset_table, | |
3472 pic_helper_symbol)); | |
3473 flag_pic = orig_flag_pic; | |
3474 | |
3475 /* Need to emit this whether or not we obey regdecls, | |
3476 since setjmp/longjmp can cause life info to screw up. | |
3477 ??? In the case where we don't obey regdecls, this is not sufficient | |
3478 since we may not fall out the bottom. */ | |
3479 emit_use (pic_offset_table_rtx); | |
3480 } | |
3481 | |
3482 /* Emit a call instruction with the pattern given by PAT. ADDR is the | |
3483 address of the call target. */ | |
3484 | |
3485 void | |
3486 sparc_emit_call_insn (rtx pat, rtx addr) | |
3487 { | |
3488 rtx insn; | |
3489 | |
3490 insn = emit_call_insn (pat); | |
3491 | |
3492 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ | |
3493 if (TARGET_VXWORKS_RTP | |
3494 && flag_pic | |
3495 && GET_CODE (addr) == SYMBOL_REF | |
3496 && (SYMBOL_REF_DECL (addr) | |
3497 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) | |
3498 : !SYMBOL_REF_LOCAL_P (addr))) | |
3499 { | |
3500 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); | |
3501 crtl->uses_pic_offset_table = 1; | |
3502 } | |
3503 } | |
3504 | |
3505 /* Return 1 if RTX is a MEM which is known to be aligned to at | |
3506 least a DESIRED byte boundary. */ | |
3507 | |
3508 int | |
3509 mem_min_alignment (rtx mem, int desired) | |
3510 { | |
3511 rtx addr, base, offset; | |
3512 | |
3513 /* If it's not a MEM we can't accept it. */ | |
3514 if (GET_CODE (mem) != MEM) | |
3515 return 0; | |
3516 | |
3517 /* Obviously... */ | |
3518 if (!TARGET_UNALIGNED_DOUBLES | |
3519 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) | |
3520 return 1; | |
3521 | |
3522 /* ??? The rest of the function predates MEM_ALIGN so | |
3523 there is probably a bit of redundancy. */ | |
3524 addr = XEXP (mem, 0); | |
3525 base = offset = NULL_RTX; | |
3526 if (GET_CODE (addr) == PLUS) | |
3527 { | |
3528 if (GET_CODE (XEXP (addr, 0)) == REG) | |
3529 { | |
3530 base = XEXP (addr, 0); | |
3531 | |
3532 /* What we are saying here is that if the base | |
3533 REG is aligned properly, the compiler will make | |
3534 sure any REG based index upon it will be so | |
3535 as well. */ | |
3536 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) | |
3537 offset = XEXP (addr, 1); | |
3538 else | |
3539 offset = const0_rtx; | |
3540 } | |
3541 } | |
3542 else if (GET_CODE (addr) == REG) | |
3543 { | |
3544 base = addr; | |
3545 offset = const0_rtx; | |
3546 } | |
3547 | |
3548 if (base != NULL_RTX) | |
3549 { | |
3550 int regno = REGNO (base); | |
3551 | |
3552 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) | |
3553 { | |
3554 /* Check if the compiler has recorded some information | |
3555 about the alignment of the base REG. If reload has | |
3556 completed, we already matched with proper alignments. | |
3557 If not running global_alloc, reload might give us | |
3558 unaligned pointer to local stack though. */ | |
3559 if (((cfun != 0 | |
3560 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) | |
3561 || (optimize && reload_completed)) | |
3562 && (INTVAL (offset) & (desired - 1)) == 0) | |
3563 return 1; | |
3564 } | |
3565 else | |
3566 { | |
3567 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) | |
3568 return 1; | |
3569 } | |
3570 } | |
3571 else if (! TARGET_UNALIGNED_DOUBLES | |
3572 || CONSTANT_P (addr) | |
3573 || GET_CODE (addr) == LO_SUM) | |
3574 { | |
3575 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES | |
3576 is true, in which case we can only assume that an access is aligned if | |
3577 it is to a constant address, or the address involves a LO_SUM. */ | |
3578 return 1; | |
3579 } | |
3580 | |
3581 /* An obviously unaligned address. */ | |
3582 return 0; | |
3583 } | |
3584 | |
3585 | |
3586 /* Vectors to keep interesting information about registers where it can easily | |
3587 be got. We used to use the actual mode value as the bit number, but there | |
3588 are more than 32 modes now. Instead we use two tables: one indexed by | |
3589 hard register number, and one indexed by mode. */ | |
3590 | |
3591 /* The purpose of sparc_mode_class is to shrink the range of modes so that | |
3592 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is | |
3593 mapped into one sparc_mode_class mode. */ | |
3594 | |
3595 enum sparc_mode_class { | |
3596 S_MODE, D_MODE, T_MODE, O_MODE, | |
3597 SF_MODE, DF_MODE, TF_MODE, OF_MODE, | |
3598 CC_MODE, CCFP_MODE | |
3599 }; | |
3600 | |
3601 /* Modes for single-word and smaller quantities. */ | |
3602 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) | |
3603 | |
3604 /* Modes for double-word and smaller quantities. */ | |
3605 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) | |
3606 | |
3607 /* Modes for quad-word and smaller quantities. */ | |
3608 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) | |
3609 | |
3610 /* Modes for 8-word and smaller quantities. */ | |
3611 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) | |
3612 | |
3613 /* Modes for single-float quantities. We must allow any single word or | |
3614 smaller quantity. This is because the fix/float conversion instructions | |
3615 take integer inputs/outputs from the float registers. */ | |
3616 #define SF_MODES (S_MODES) | |
3617 | |
3618 /* Modes for double-float and smaller quantities. */ | |
3619 #define DF_MODES (S_MODES | D_MODES) | |
3620 | |
3621 /* Modes for double-float only quantities. */ | |
3622 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) | |
3623 | |
3624 /* Modes for quad-float only quantities. */ | |
3625 #define TF_ONLY_MODES (1 << (int) TF_MODE) | |
3626 | |
3627 /* Modes for quad-float and smaller quantities. */ | |
3628 #define TF_MODES (DF_MODES | TF_ONLY_MODES) | |
3629 | |
3630 /* Modes for quad-float and double-float quantities. */ | |
3631 #define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES) | |
3632 | |
3633 /* Modes for quad-float pair only quantities. */ | |
3634 #define OF_ONLY_MODES (1 << (int) OF_MODE) | |
3635 | |
3636 /* Modes for quad-float pairs and smaller quantities. */ | |
3637 #define OF_MODES (TF_MODES | OF_ONLY_MODES) | |
3638 | |
3639 #define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES) | |
3640 | |
3641 /* Modes for condition codes. */ | |
3642 #define CC_MODES (1 << (int) CC_MODE) | |
3643 #define CCFP_MODES (1 << (int) CCFP_MODE) | |
3644 | |
3645 /* Value is 1 if register/mode pair is acceptable on sparc. | |
3646 The funny mixture of D and T modes is because integer operations | |
3647 do not specially operate on tetra quantities, so non-quad-aligned | |
3648 registers can hold quadword quantities (except %o4 and %i4 because | |
3649 they cross fixed registers). */ | |
3650 | |
3651 /* This points to either the 32 bit or the 64 bit version. */ | |
3652 const int *hard_regno_mode_classes; | |
3653 | |
3654 static const int hard_32bit_mode_classes[] = { | |
3655 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, | |
3656 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, | |
3657 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, | |
3658 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, | |
3659 | |
3660 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3661 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3662 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3663 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3664 | |
3665 /* FP regs f32 to f63. Only the even numbered registers actually exist, | |
3666 and none can hold SFmode/SImode values. */ | |
3667 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3668 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3669 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3670 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3671 | |
3672 /* %fcc[0123] */ | |
3673 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, | |
3674 | |
3675 /* %icc */ | |
3676 CC_MODES | |
3677 }; | |
3678 | |
3679 static const int hard_64bit_mode_classes[] = { | |
3680 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, | |
3681 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, | |
3682 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, | |
3683 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, | |
3684 | |
3685 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3686 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3687 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3688 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, | |
3689 | |
3690 /* FP regs f32 to f63. Only the even numbered registers actually exist, | |
3691 and none can hold SFmode/SImode values. */ | |
3692 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3693 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3694 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3695 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, | |
3696 | |
3697 /* %fcc[0123] */ | |
3698 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, | |
3699 | |
3700 /* %icc */ | |
3701 CC_MODES | |
3702 }; | |
3703 | |
3704 int sparc_mode_class [NUM_MACHINE_MODES]; | |
3705 | |
3706 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; | |
3707 | |
3708 static void | |
3709 sparc_init_modes (void) | |
3710 { | |
3711 int i; | |
3712 | |
3713 for (i = 0; i < NUM_MACHINE_MODES; i++) | |
3714 { | |
3715 switch (GET_MODE_CLASS (i)) | |
3716 { | |
3717 case MODE_INT: | |
3718 case MODE_PARTIAL_INT: | |
3719 case MODE_COMPLEX_INT: | |
3720 if (GET_MODE_SIZE (i) <= 4) | |
3721 sparc_mode_class[i] = 1 << (int) S_MODE; | |
3722 else if (GET_MODE_SIZE (i) == 8) | |
3723 sparc_mode_class[i] = 1 << (int) D_MODE; | |
3724 else if (GET_MODE_SIZE (i) == 16) | |
3725 sparc_mode_class[i] = 1 << (int) T_MODE; | |
3726 else if (GET_MODE_SIZE (i) == 32) | |
3727 sparc_mode_class[i] = 1 << (int) O_MODE; | |
3728 else | |
3729 sparc_mode_class[i] = 0; | |
3730 break; | |
3731 case MODE_VECTOR_INT: | |
3732 if (GET_MODE_SIZE (i) <= 4) | |
3733 sparc_mode_class[i] = 1 << (int)SF_MODE; | |
3734 else if (GET_MODE_SIZE (i) == 8) | |
3735 sparc_mode_class[i] = 1 << (int)DF_MODE; | |
3736 break; | |
3737 case MODE_FLOAT: | |
3738 case MODE_COMPLEX_FLOAT: | |
3739 if (GET_MODE_SIZE (i) <= 4) | |
3740 sparc_mode_class[i] = 1 << (int) SF_MODE; | |
3741 else if (GET_MODE_SIZE (i) == 8) | |
3742 sparc_mode_class[i] = 1 << (int) DF_MODE; | |
3743 else if (GET_MODE_SIZE (i) == 16) | |
3744 sparc_mode_class[i] = 1 << (int) TF_MODE; | |
3745 else if (GET_MODE_SIZE (i) == 32) | |
3746 sparc_mode_class[i] = 1 << (int) OF_MODE; | |
3747 else | |
3748 sparc_mode_class[i] = 0; | |
3749 break; | |
3750 case MODE_CC: | |
3751 if (i == (int) CCFPmode || i == (int) CCFPEmode) | |
3752 sparc_mode_class[i] = 1 << (int) CCFP_MODE; | |
3753 else | |
3754 sparc_mode_class[i] = 1 << (int) CC_MODE; | |
3755 break; | |
3756 default: | |
3757 sparc_mode_class[i] = 0; | |
3758 break; | |
3759 } | |
3760 } | |
3761 | |
3762 if (TARGET_ARCH64) | |
3763 hard_regno_mode_classes = hard_64bit_mode_classes; | |
3764 else | |
3765 hard_regno_mode_classes = hard_32bit_mode_classes; | |
3766 | |
3767 /* Initialize the array used by REGNO_REG_CLASS. */ | |
3768 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
3769 { | |
3770 if (i < 16 && TARGET_V8PLUS) | |
3771 sparc_regno_reg_class[i] = I64_REGS; | |
3772 else if (i < 32 || i == FRAME_POINTER_REGNUM) | |
3773 sparc_regno_reg_class[i] = GENERAL_REGS; | |
3774 else if (i < 64) | |
3775 sparc_regno_reg_class[i] = FP_REGS; | |
3776 else if (i < 96) | |
3777 sparc_regno_reg_class[i] = EXTRA_FP_REGS; | |
3778 else if (i < 100) | |
3779 sparc_regno_reg_class[i] = FPCC_REGS; | |
3780 else | |
3781 sparc_regno_reg_class[i] = NO_REGS; | |
3782 } | |
3783 } | |
3784 | |
3785 /* Compute the frame size required by the function. This function is called | |
3786 during the reload pass and also by sparc_expand_prologue. */ | |
3787 | |
3788 HOST_WIDE_INT | |
3789 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p) | |
3790 { | |
3791 int outgoing_args_size = (crtl->outgoing_args_size | |
3792 + REG_PARM_STACK_SPACE (current_function_decl)); | |
3793 int n_regs = 0; /* N_REGS is the number of 4-byte regs saved thus far. */ | |
3794 int i; | |
3795 | |
3796 if (TARGET_ARCH64) | |
3797 { | |
3798 for (i = 0; i < 8; i++) | |
3799 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) | |
3800 n_regs += 2; | |
3801 } | |
3802 else | |
3803 { | |
3804 for (i = 0; i < 8; i += 2) | |
3805 if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) | |
3806 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1])) | |
3807 n_regs += 2; | |
3808 } | |
3809 | |
3810 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) | |
3811 if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) | |
3812 || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1])) | |
3813 n_regs += 2; | |
3814 | |
3815 /* Set up values for use in prologue and epilogue. */ | |
3816 num_gfregs = n_regs; | |
3817 | |
3818 if (leaf_function_p | |
3819 && n_regs == 0 | |
3820 && size == 0 | |
3821 && crtl->outgoing_args_size == 0) | |
3822 actual_fsize = apparent_fsize = 0; | |
3823 else | |
3824 { | |
3825 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */ | |
3826 apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8; | |
3827 apparent_fsize += n_regs * 4; | |
3828 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8); | |
3829 } | |
3830 | |
3831 /* Make sure nothing can clobber our register windows. | |
3832 If a SAVE must be done, or there is a stack-local variable, | |
3833 the register window area must be allocated. */ | |
3834 if (! leaf_function_p || size > 0) | |
3835 actual_fsize += FIRST_PARM_OFFSET (current_function_decl); | |
3836 | |
3837 return SPARC_STACK_ALIGN (actual_fsize); | |
3838 } | |
3839 | |
3840 /* Output any necessary .register pseudo-ops. */ | |
3841 | |
3842 void | |
3843 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) | |
3844 { | |
3845 #ifdef HAVE_AS_REGISTER_PSEUDO_OP | |
3846 int i; | |
3847 | |
3848 if (TARGET_ARCH32) | |
3849 return; | |
3850 | |
3851 /* Check if %g[2367] were used without | |
3852 .register being printed for them already. */ | |
3853 for (i = 2; i < 8; i++) | |
3854 { | |
3855 if (df_regs_ever_live_p (i) | |
3856 && ! sparc_hard_reg_printed [i]) | |
3857 { | |
3858 sparc_hard_reg_printed [i] = 1; | |
3859 /* %g7 is used as TLS base register, use #ignore | |
3860 for it instead of #scratch. */ | |
3861 fprintf (file, "\t.register\t%%g%d, #%s\n", i, | |
3862 i == 7 ? "ignore" : "scratch"); | |
3863 } | |
3864 if (i == 3) i = 5; | |
3865 } | |
3866 #endif | |
3867 } | |
3868 | |
3869 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET | |
3870 as needed. LOW should be double-word aligned for 32-bit registers. | |
3871 Return the new OFFSET. */ | |
3872 | |
3873 #define SORR_SAVE 0 | |
3874 #define SORR_RESTORE 1 | |
3875 | |
3876 static int | |
3877 save_or_restore_regs (int low, int high, rtx base, int offset, int action) | |
3878 { | |
3879 rtx mem, insn; | |
3880 int i; | |
3881 | |
3882 if (TARGET_ARCH64 && high <= 32) | |
3883 { | |
3884 for (i = low; i < high; i++) | |
3885 { | |
3886 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) | |
3887 { | |
3888 mem = gen_rtx_MEM (DImode, plus_constant (base, offset)); | |
3889 set_mem_alias_set (mem, sparc_sr_alias_set); | |
3890 if (action == SORR_SAVE) | |
3891 { | |
3892 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); | |
3893 RTX_FRAME_RELATED_P (insn) = 1; | |
3894 } | |
3895 else /* action == SORR_RESTORE */ | |
3896 emit_move_insn (gen_rtx_REG (DImode, i), mem); | |
3897 offset += 8; | |
3898 } | |
3899 } | |
3900 } | |
3901 else | |
3902 { | |
3903 for (i = low; i < high; i += 2) | |
3904 { | |
3905 bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i]; | |
3906 bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]; | |
3907 enum machine_mode mode; | |
3908 int regno; | |
3909 | |
3910 if (reg0 && reg1) | |
3911 { | |
3912 mode = i < 32 ? DImode : DFmode; | |
3913 regno = i; | |
3914 } | |
3915 else if (reg0) | |
3916 { | |
3917 mode = i < 32 ? SImode : SFmode; | |
3918 regno = i; | |
3919 } | |
3920 else if (reg1) | |
3921 { | |
3922 mode = i < 32 ? SImode : SFmode; | |
3923 regno = i + 1; | |
3924 offset += 4; | |
3925 } | |
3926 else | |
3927 continue; | |
3928 | |
3929 mem = gen_rtx_MEM (mode, plus_constant (base, offset)); | |
3930 set_mem_alias_set (mem, sparc_sr_alias_set); | |
3931 if (action == SORR_SAVE) | |
3932 { | |
3933 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); | |
3934 RTX_FRAME_RELATED_P (insn) = 1; | |
3935 } | |
3936 else /* action == SORR_RESTORE */ | |
3937 emit_move_insn (gen_rtx_REG (mode, regno), mem); | |
3938 | |
3939 /* Always preserve double-word alignment. */ | |
3940 offset = (offset + 7) & -8; | |
3941 } | |
3942 } | |
3943 | |
3944 return offset; | |
3945 } | |
3946 | |
3947 /* Emit code to save call-saved registers. */ | |
3948 | |
3949 static void | |
3950 emit_save_or_restore_regs (int action) | |
3951 { | |
3952 HOST_WIDE_INT offset; | |
3953 rtx base; | |
3954 | |
3955 offset = frame_base_offset - apparent_fsize; | |
3956 | |
3957 if (offset < -4096 || offset + num_gfregs * 4 > 4095) | |
3958 { | |
3959 /* ??? This might be optimized a little as %g1 might already have a | |
3960 value close enough that a single add insn will do. */ | |
3961 /* ??? Although, all of this is probably only a temporary fix | |
3962 because if %g1 can hold a function result, then | |
3963 sparc_expand_epilogue will lose (the result will be | |
3964 clobbered). */ | |
3965 base = gen_rtx_REG (Pmode, 1); | |
3966 emit_move_insn (base, GEN_INT (offset)); | |
3967 emit_insn (gen_rtx_SET (VOIDmode, | |
3968 base, | |
3969 gen_rtx_PLUS (Pmode, frame_base_reg, base))); | |
3970 offset = 0; | |
3971 } | |
3972 else | |
3973 base = frame_base_reg; | |
3974 | |
3975 offset = save_or_restore_regs (0, 8, base, offset, action); | |
3976 save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action); | |
3977 } | |
3978 | |
3979 /* Generate a save_register_window insn. */ | |
3980 | |
3981 static rtx | |
3982 gen_save_register_window (rtx increment) | |
3983 { | |
3984 if (TARGET_ARCH64) | |
3985 return gen_save_register_windowdi (increment); | |
3986 else | |
3987 return gen_save_register_windowsi (increment); | |
3988 } | |
3989 | |
3990 /* Generate an increment for the stack pointer. */ | |
3991 | |
3992 static rtx | |
3993 gen_stack_pointer_inc (rtx increment) | |
3994 { | |
3995 return gen_rtx_SET (VOIDmode, | |
3996 stack_pointer_rtx, | |
3997 gen_rtx_PLUS (Pmode, | |
3998 stack_pointer_rtx, | |
3999 increment)); | |
4000 } | |
4001 | |
4002 /* Generate a decrement for the stack pointer. */ | |
4003 | |
4004 static rtx | |
4005 gen_stack_pointer_dec (rtx decrement) | |
4006 { | |
4007 return gen_rtx_SET (VOIDmode, | |
4008 stack_pointer_rtx, | |
4009 gen_rtx_MINUS (Pmode, | |
4010 stack_pointer_rtx, | |
4011 decrement)); | |
4012 } | |
4013 | |
4014 /* Expand the function prologue. The prologue is responsible for reserving | |
4015 storage for the frame, saving the call-saved registers and loading the | |
4016 PIC register if needed. */ | |
4017 | |
4018 void | |
4019 sparc_expand_prologue (void) | |
4020 { | |
4021 rtx insn; | |
4022 int i; | |
4023 | |
4024 /* Compute a snapshot of current_function_uses_only_leaf_regs. Relying | |
4025 on the final value of the flag means deferring the prologue/epilogue | |
4026 expansion until just before the second scheduling pass, which is too | |
4027 late to emit multiple epilogues or return insns. | |
4028 | |
4029 Of course we are making the assumption that the value of the flag | |
4030 will not change between now and its final value. Of the three parts | |
4031 of the formula, only the last one can reasonably vary. Let's take a | |
4032 closer look, after assuming that the first two ones are set to true | |
4033 (otherwise the last value is effectively silenced). | |
4034 | |
4035 If only_leaf_regs_used returns false, the global predicate will also | |
4036 be false so the actual frame size calculated below will be positive. | |
4037 As a consequence, the save_register_window insn will be emitted in | |
4038 the instruction stream; now this insn explicitly references %fp | |
4039 which is not a leaf register so only_leaf_regs_used will always | |
4040 return false subsequently. | |
4041 | |
4042 If only_leaf_regs_used returns true, we hope that the subsequent | |
4043 optimization passes won't cause non-leaf registers to pop up. For | |
4044 example, the regrename pass has special provisions to not rename to | |
4045 non-leaf registers in a leaf function. */ | |
4046 sparc_leaf_function_p | |
4047 = optimize > 0 && leaf_function_p () && only_leaf_regs_used (); | |
4048 | |
4049 /* Need to use actual_fsize, since we are also allocating | |
4050 space for our callee (and our own register save area). */ | |
4051 actual_fsize | |
4052 = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); | |
4053 | |
4054 /* Advertise that the data calculated just above are now valid. */ | |
4055 sparc_prologue_data_valid_p = true; | |
4056 | |
4057 if (sparc_leaf_function_p) | |
4058 { | |
4059 frame_base_reg = stack_pointer_rtx; | |
4060 frame_base_offset = actual_fsize + SPARC_STACK_BIAS; | |
4061 } | |
4062 else | |
4063 { | |
4064 frame_base_reg = hard_frame_pointer_rtx; | |
4065 frame_base_offset = SPARC_STACK_BIAS; | |
4066 } | |
4067 | |
4068 if (actual_fsize == 0) | |
4069 /* do nothing. */ ; | |
4070 else if (sparc_leaf_function_p) | |
4071 { | |
4072 if (actual_fsize <= 4096) | |
4073 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize))); | |
4074 else if (actual_fsize <= 8192) | |
4075 { | |
4076 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); | |
4077 /* %sp is still the CFA register. */ | |
4078 RTX_FRAME_RELATED_P (insn) = 1; | |
4079 insn | |
4080 = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize))); | |
4081 } | |
4082 else | |
4083 { | |
4084 rtx reg = gen_rtx_REG (Pmode, 1); | |
4085 emit_move_insn (reg, GEN_INT (-actual_fsize)); | |
4086 insn = emit_insn (gen_stack_pointer_inc (reg)); | |
4087 REG_NOTES (insn) = | |
4088 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, | |
4089 gen_stack_pointer_inc (GEN_INT (-actual_fsize)), | |
4090 REG_NOTES (insn)); | |
4091 } | |
4092 | |
4093 RTX_FRAME_RELATED_P (insn) = 1; | |
4094 } | |
4095 else | |
4096 { | |
4097 if (actual_fsize <= 4096) | |
4098 insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize))); | |
4099 else if (actual_fsize <= 8192) | |
4100 { | |
4101 insn = emit_insn (gen_save_register_window (GEN_INT (-4096))); | |
4102 /* %sp is not the CFA register anymore. */ | |
4103 emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize))); | |
4104 } | |
4105 else | |
4106 { | |
4107 rtx reg = gen_rtx_REG (Pmode, 1); | |
4108 emit_move_insn (reg, GEN_INT (-actual_fsize)); | |
4109 insn = emit_insn (gen_save_register_window (reg)); | |
4110 } | |
4111 | |
4112 RTX_FRAME_RELATED_P (insn) = 1; | |
4113 for (i=0; i < XVECLEN (PATTERN (insn), 0); i++) | |
4114 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1; | |
4115 } | |
4116 | |
4117 if (num_gfregs) | |
4118 emit_save_or_restore_regs (SORR_SAVE); | |
4119 | |
4120 /* Load the PIC register if needed. */ | |
4121 if (flag_pic && crtl->uses_pic_offset_table) | |
4122 load_pic_register (false); | |
4123 } | |
4124 | |
4125 /* This function generates the assembly code for function entry, which boils | |
4126 down to emitting the necessary .register directives. */ | |
4127 | |
4128 static void | |
4129 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) | |
4130 { | |
4131 /* Check that the assumption we made in sparc_expand_prologue is valid. */ | |
4132 gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs); | |
4133 | |
4134 sparc_output_scratch_registers (file); | |
4135 } | |
4136 | |
4137 /* Expand the function epilogue, either normal or part of a sibcall. | |
4138 We emit all the instructions except the return or the call. */ | |
4139 | |
4140 void | |
4141 sparc_expand_epilogue (void) | |
4142 { | |
4143 if (num_gfregs) | |
4144 emit_save_or_restore_regs (SORR_RESTORE); | |
4145 | |
4146 if (actual_fsize == 0) | |
4147 /* do nothing. */ ; | |
4148 else if (sparc_leaf_function_p) | |
4149 { | |
4150 if (actual_fsize <= 4096) | |
4151 emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize))); | |
4152 else if (actual_fsize <= 8192) | |
4153 { | |
4154 emit_insn (gen_stack_pointer_dec (GEN_INT (-4096))); | |
4155 emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize))); | |
4156 } | |
4157 else | |
4158 { | |
4159 rtx reg = gen_rtx_REG (Pmode, 1); | |
4160 emit_move_insn (reg, GEN_INT (-actual_fsize)); | |
4161 emit_insn (gen_stack_pointer_dec (reg)); | |
4162 } | |
4163 } | |
4164 } | |
4165 | |
4166 /* Return true if it is appropriate to emit `return' instructions in the | |
4167 body of a function. */ | |
4168 | |
4169 bool | |
4170 sparc_can_use_return_insn_p (void) | |
4171 { | |
4172 return sparc_prologue_data_valid_p | |
4173 && (actual_fsize == 0 || !sparc_leaf_function_p); | |
4174 } | |
4175 | |
4176 /* This function generates the assembly code for function exit. */ | |
4177 | |
4178 static void | |
4179 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) | |
4180 { | |
4181 /* If code does not drop into the epilogue, we have to still output | |
4182 a dummy nop for the sake of sane backtraces. Otherwise, if the | |
4183 last two instructions of a function were "call foo; dslot;" this | |
4184 can make the return PC of foo (i.e. address of call instruction | |
4185 plus 8) point to the first instruction in the next function. */ | |
4186 | |
4187 rtx insn, last_real_insn; | |
4188 | |
4189 insn = get_last_insn (); | |
4190 | |
4191 last_real_insn = prev_real_insn (insn); | |
4192 if (last_real_insn | |
4193 && GET_CODE (last_real_insn) == INSN | |
4194 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) | |
4195 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); | |
4196 | |
4197 if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN) | |
4198 fputs("\tnop\n", file); | |
4199 | |
4200 sparc_output_deferred_case_vectors (); | |
4201 } | |
4202 | |
4203 /* Output a 'restore' instruction. */ | |
4204 | |
4205 static void | |
4206 output_restore (rtx pat) | |
4207 { | |
4208 rtx operands[3]; | |
4209 | |
4210 if (! pat) | |
4211 { | |
4212 fputs ("\t restore\n", asm_out_file); | |
4213 return; | |
4214 } | |
4215 | |
4216 gcc_assert (GET_CODE (pat) == SET); | |
4217 | |
4218 operands[0] = SET_DEST (pat); | |
4219 pat = SET_SRC (pat); | |
4220 | |
4221 switch (GET_CODE (pat)) | |
4222 { | |
4223 case PLUS: | |
4224 operands[1] = XEXP (pat, 0); | |
4225 operands[2] = XEXP (pat, 1); | |
4226 output_asm_insn (" restore %r1, %2, %Y0", operands); | |
4227 break; | |
4228 case LO_SUM: | |
4229 operands[1] = XEXP (pat, 0); | |
4230 operands[2] = XEXP (pat, 1); | |
4231 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); | |
4232 break; | |
4233 case ASHIFT: | |
4234 operands[1] = XEXP (pat, 0); | |
4235 gcc_assert (XEXP (pat, 1) == const1_rtx); | |
4236 output_asm_insn (" restore %r1, %r1, %Y0", operands); | |
4237 break; | |
4238 default: | |
4239 operands[1] = pat; | |
4240 output_asm_insn (" restore %%g0, %1, %Y0", operands); | |
4241 break; | |
4242 } | |
4243 } | |
4244 | |
4245 /* Output a return. */ | |
4246 | |
4247 const char * | |
4248 output_return (rtx insn) | |
4249 { | |
4250 if (sparc_leaf_function_p) | |
4251 { | |
4252 /* This is a leaf function so we don't have to bother restoring the | |
4253 register window, which frees us from dealing with the convoluted | |
4254 semantics of restore/return. We simply output the jump to the | |
4255 return address and the insn in the delay slot (if any). */ | |
4256 | |
4257 gcc_assert (! crtl->calls_eh_return); | |
4258 | |
4259 return "jmp\t%%o7+%)%#"; | |
4260 } | |
4261 else | |
4262 { | |
4263 /* This is a regular function so we have to restore the register window. | |
4264 We may have a pending insn for the delay slot, which will be either | |
4265 combined with the 'restore' instruction or put in the delay slot of | |
4266 the 'return' instruction. */ | |
4267 | |
4268 if (crtl->calls_eh_return) | |
4269 { | |
4270 /* If the function uses __builtin_eh_return, the eh_return | |
4271 machinery occupies the delay slot. */ | |
4272 gcc_assert (! final_sequence); | |
4273 | |
4274 if (! flag_delayed_branch) | |
4275 fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file); | |
4276 | |
4277 if (TARGET_V9) | |
4278 fputs ("\treturn\t%i7+8\n", asm_out_file); | |
4279 else | |
4280 fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file); | |
4281 | |
4282 if (flag_delayed_branch) | |
4283 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); | |
4284 else | |
4285 fputs ("\t nop\n", asm_out_file); | |
4286 } | |
4287 else if (final_sequence) | |
4288 { | |
4289 rtx delay, pat; | |
4290 | |
4291 delay = NEXT_INSN (insn); | |
4292 gcc_assert (delay); | |
4293 | |
4294 pat = PATTERN (delay); | |
4295 | |
4296 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) | |
4297 { | |
4298 epilogue_renumber (&pat, 0); | |
4299 return "return\t%%i7+%)%#"; | |
4300 } | |
4301 else | |
4302 { | |
4303 output_asm_insn ("jmp\t%%i7+%)", NULL); | |
4304 output_restore (pat); | |
4305 PATTERN (delay) = gen_blockage (); | |
4306 INSN_CODE (delay) = -1; | |
4307 } | |
4308 } | |
4309 else | |
4310 { | |
4311 /* The delay slot is empty. */ | |
4312 if (TARGET_V9) | |
4313 return "return\t%%i7+%)\n\t nop"; | |
4314 else if (flag_delayed_branch) | |
4315 return "jmp\t%%i7+%)\n\t restore"; | |
4316 else | |
4317 return "restore\n\tjmp\t%%o7+%)\n\t nop"; | |
4318 } | |
4319 } | |
4320 | |
4321 return ""; | |
4322 } | |
4323 | |
4324 /* Output a sibling call. */ | |
4325 | |
4326 const char * | |
4327 output_sibcall (rtx insn, rtx call_operand) | |
4328 { | |
4329 rtx operands[1]; | |
4330 | |
4331 gcc_assert (flag_delayed_branch); | |
4332 | |
4333 operands[0] = call_operand; | |
4334 | |
4335 if (sparc_leaf_function_p) | |
4336 { | |
4337 /* This is a leaf function so we don't have to bother restoring the | |
4338 register window. We simply output the jump to the function and | |
4339 the insn in the delay slot (if any). */ | |
4340 | |
4341 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); | |
4342 | |
4343 if (final_sequence) | |
4344 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", | |
4345 operands); | |
4346 else | |
4347 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize | |
4348 it into branch if possible. */ | |
4349 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", | |
4350 operands); | |
4351 } | |
4352 else | |
4353 { | |
4354 /* This is a regular function so we have to restore the register window. | |
4355 We may have a pending insn for the delay slot, which will be combined | |
4356 with the 'restore' instruction. */ | |
4357 | |
4358 output_asm_insn ("call\t%a0, 0", operands); | |
4359 | |
4360 if (final_sequence) | |
4361 { | |
4362 rtx delay = NEXT_INSN (insn); | |
4363 gcc_assert (delay); | |
4364 | |
4365 output_restore (PATTERN (delay)); | |
4366 | |
4367 PATTERN (delay) = gen_blockage (); | |
4368 INSN_CODE (delay) = -1; | |
4369 } | |
4370 else | |
4371 output_restore (NULL_RTX); | |
4372 } | |
4373 | |
4374 return ""; | |
4375 } | |
4376 | |
4377 /* Functions for handling argument passing. | |
4378 | |
4379 For 32-bit, the first 6 args are normally in registers and the rest are | |
4380 pushed. Any arg that starts within the first 6 words is at least | |
4381 partially passed in a register unless its data type forbids. | |
4382 | |
4383 For 64-bit, the argument registers are laid out as an array of 16 elements | |
4384 and arguments are added sequentially. The first 6 int args and up to the | |
4385 first 16 fp args (depending on size) are passed in regs. | |
4386 | |
4387 Slot Stack Integral Float Float in structure Double Long Double | |
4388 ---- ----- -------- ----- ------------------ ------ ----------- | |
4389 15 [SP+248] %f31 %f30,%f31 %d30 | |
4390 14 [SP+240] %f29 %f28,%f29 %d28 %q28 | |
4391 13 [SP+232] %f27 %f26,%f27 %d26 | |
4392 12 [SP+224] %f25 %f24,%f25 %d24 %q24 | |
4393 11 [SP+216] %f23 %f22,%f23 %d22 | |
4394 10 [SP+208] %f21 %f20,%f21 %d20 %q20 | |
4395 9 [SP+200] %f19 %f18,%f19 %d18 | |
4396 8 [SP+192] %f17 %f16,%f17 %d16 %q16 | |
4397 7 [SP+184] %f15 %f14,%f15 %d14 | |
4398 6 [SP+176] %f13 %f12,%f13 %d12 %q12 | |
4399 5 [SP+168] %o5 %f11 %f10,%f11 %d10 | |
4400 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 | |
4401 3 [SP+152] %o3 %f7 %f6,%f7 %d6 | |
4402 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 | |
4403 1 [SP+136] %o1 %f3 %f2,%f3 %d2 | |
4404 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 | |
4405 | |
4406 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. | |
4407 | |
4408 Integral arguments are always passed as 64-bit quantities appropriately | |
4409 extended. | |
4410 | |
4411 Passing of floating point values is handled as follows. | |
4412 If a prototype is in scope: | |
4413 If the value is in a named argument (i.e. not a stdarg function or a | |
4414 value not part of the `...') then the value is passed in the appropriate | |
4415 fp reg. | |
4416 If the value is part of the `...' and is passed in one of the first 6 | |
4417 slots then the value is passed in the appropriate int reg. | |
4418 If the value is part of the `...' and is not passed in one of the first 6 | |
4419 slots then the value is passed in memory. | |
4420 If a prototype is not in scope: | |
4421 If the value is one of the first 6 arguments the value is passed in the | |
4422 appropriate integer reg and the appropriate fp reg. | |
4423 If the value is not one of the first 6 arguments the value is passed in | |
4424 the appropriate fp reg and in memory. | |
4425 | |
4426 | |
4427 Summary of the calling conventions implemented by GCC on SPARC: | |
4428 | |
4429 32-bit ABI: | |
4430 size argument return value | |
4431 | |
4432 small integer <4 int. reg. int. reg. | |
4433 word 4 int. reg. int. reg. | |
4434 double word 8 int. reg. int. reg. | |
4435 | |
4436 _Complex small integer <8 int. reg. int. reg. | |
4437 _Complex word 8 int. reg. int. reg. | |
4438 _Complex double word 16 memory int. reg. | |
4439 | |
4440 vector integer <=8 int. reg. FP reg. | |
4441 vector integer >8 memory memory | |
4442 | |
4443 float 4 int. reg. FP reg. | |
4444 double 8 int. reg. FP reg. | |
4445 long double 16 memory memory | |
4446 | |
4447 _Complex float 8 memory FP reg. | |
4448 _Complex double 16 memory FP reg. | |
4449 _Complex long double 32 memory FP reg. | |
4450 | |
4451 vector float any memory memory | |
4452 | |
4453 aggregate any memory memory | |
4454 | |
4455 | |
4456 | |
4457 64-bit ABI: | |
4458 size argument return value | |
4459 | |
4460 small integer <8 int. reg. int. reg. | |
4461 word 8 int. reg. int. reg. | |
4462 double word 16 int. reg. int. reg. | |
4463 | |
4464 _Complex small integer <16 int. reg. int. reg. | |
4465 _Complex word 16 int. reg. int. reg. | |
4466 _Complex double word 32 memory int. reg. | |
4467 | |
4468 vector integer <=16 FP reg. FP reg. | |
4469 vector integer 16<s<=32 memory FP reg. | |
4470 vector integer >32 memory memory | |
4471 | |
4472 float 4 FP reg. FP reg. | |
4473 double 8 FP reg. FP reg. | |
4474 long double 16 FP reg. FP reg. | |
4475 | |
4476 _Complex float 8 FP reg. FP reg. | |
4477 _Complex double 16 FP reg. FP reg. | |
4478 _Complex long double 32 memory FP reg. | |
4479 | |
4480 vector float <=16 FP reg. FP reg. | |
4481 vector float 16<s<=32 memory FP reg. | |
4482 vector float >32 memory memory | |
4483 | |
4484 aggregate <=16 reg. reg. | |
4485 aggregate 16<s<=32 memory reg. | |
4486 aggregate >32 memory memory | |
4487 | |
4488 | |
4489 | |
4490 Note #1: complex floating-point types follow the extended SPARC ABIs as | |
4491 implemented by the Sun compiler. | |
4492 | |
4493 Note #2: integral vector types follow the scalar floating-point types | |
4494 conventions to match what is implemented by the Sun VIS SDK. | |
4495 | |
4496 Note #3: floating-point vector types follow the aggregate types | |
4497 conventions. */ | |
4498 | |
4499 | |
4500 /* Maximum number of int regs for args. */ | |
4501 #define SPARC_INT_ARG_MAX 6 | |
4502 /* Maximum number of fp regs for args. */ | |
4503 #define SPARC_FP_ARG_MAX 16 | |
4504 | |
4505 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) | |
4506 | |
4507 /* Handle the INIT_CUMULATIVE_ARGS macro. | |
4508 Initialize a variable CUM of type CUMULATIVE_ARGS | |
4509 for a call to a function whose data type is FNTYPE. | |
4510 For a library call, FNTYPE is 0. */ | |
4511 | |
4512 void | |
4513 init_cumulative_args (struct sparc_args *cum, tree fntype, | |
4514 rtx libname ATTRIBUTE_UNUSED, | |
4515 tree fndecl ATTRIBUTE_UNUSED) | |
4516 { | |
4517 cum->words = 0; | |
4518 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype); | |
4519 cum->libcall_p = fntype == 0; | |
4520 } | |
4521 | |
4522 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook. | |
4523 When a prototype says `char' or `short', really pass an `int'. */ | |
4524 | |
4525 static bool | |
4526 sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED) | |
4527 { | |
4528 return TARGET_ARCH32 ? true : false; | |
4529 } | |
4530 | |
4531 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ | |
4532 | |
4533 static bool | |
4534 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED) | |
4535 { | |
4536 return TARGET_ARCH64 ? true : false; | |
4537 } | |
4538 | |
4539 /* Scan the record type TYPE and return the following predicates: | |
4540 - INTREGS_P: the record contains at least one field or sub-field | |
4541 that is eligible for promotion in integer registers. | |
4542 - FP_REGS_P: the record contains at least one field or sub-field | |
4543 that is eligible for promotion in floating-point registers. | |
4544 - PACKED_P: the record contains at least one field that is packed. | |
4545 | |
4546 Sub-fields are not taken into account for the PACKED_P predicate. */ | |
4547 | |
4548 static void | |
4549 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p) | |
4550 { | |
4551 tree field; | |
4552 | |
4553 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
4554 { | |
4555 if (TREE_CODE (field) == FIELD_DECL) | |
4556 { | |
4557 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) | |
4558 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0); | |
4559 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) | |
4560 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) | |
4561 && TARGET_FPU) | |
4562 *fpregs_p = 1; | |
4563 else | |
4564 *intregs_p = 1; | |
4565 | |
4566 if (packed_p && DECL_PACKED (field)) | |
4567 *packed_p = 1; | |
4568 } | |
4569 } | |
4570 } | |
4571 | |
4572 /* Compute the slot number to pass an argument in. | |
4573 Return the slot number or -1 if passing on the stack. | |
4574 | |
4575 CUM is a variable of type CUMULATIVE_ARGS which gives info about | |
4576 the preceding args and about the function being called. | |
4577 MODE is the argument's machine mode. | |
4578 TYPE is the data type of the argument (as a tree). | |
4579 This is null for libcalls where that information may | |
4580 not be available. | |
4581 NAMED is nonzero if this argument is a named parameter | |
4582 (otherwise it is an extra parameter matching an ellipsis). | |
4583 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. | |
4584 *PREGNO records the register number to use if scalar type. | |
4585 *PPADDING records the amount of padding needed in words. */ | |
4586 | |
4587 static int | |
4588 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode, | |
4589 tree type, int named, int incoming_p, | |
4590 int *pregno, int *ppadding) | |
4591 { | |
4592 int regbase = (incoming_p | |
4593 ? SPARC_INCOMING_INT_ARG_FIRST | |
4594 : SPARC_OUTGOING_INT_ARG_FIRST); | |
4595 int slotno = cum->words; | |
4596 enum mode_class mclass; | |
4597 int regno; | |
4598 | |
4599 *ppadding = 0; | |
4600 | |
4601 if (type && TREE_ADDRESSABLE (type)) | |
4602 return -1; | |
4603 | |
4604 if (TARGET_ARCH32 | |
4605 && mode == BLKmode | |
4606 && type | |
4607 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0) | |
4608 return -1; | |
4609 | |
4610 /* For SPARC64, objects requiring 16-byte alignment get it. */ | |
4611 if (TARGET_ARCH64 | |
4612 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 | |
4613 && (slotno & 1) != 0) | |
4614 slotno++, *ppadding = 1; | |
4615 | |
4616 mclass = GET_MODE_CLASS (mode); | |
4617 if (type && TREE_CODE (type) == VECTOR_TYPE) | |
4618 { | |
4619 /* Vector types deserve special treatment because they are | |
4620 polymorphic wrt their mode, depending upon whether VIS | |
4621 instructions are enabled. */ | |
4622 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) | |
4623 { | |
4624 /* The SPARC port defines no floating-point vector modes. */ | |
4625 gcc_assert (mode == BLKmode); | |
4626 } | |
4627 else | |
4628 { | |
4629 /* Integral vector types should either have a vector | |
4630 mode or an integral mode, because we are guaranteed | |
4631 by pass_by_reference that their size is not greater | |
4632 than 16 bytes and TImode is 16-byte wide. */ | |
4633 gcc_assert (mode != BLKmode); | |
4634 | |
4635 /* Vector integers are handled like floats according to | |
4636 the Sun VIS SDK. */ | |
4637 mclass = MODE_FLOAT; | |
4638 } | |
4639 } | |
4640 | |
4641 switch (mclass) | |
4642 { | |
4643 case MODE_FLOAT: | |
4644 case MODE_COMPLEX_FLOAT: | |
4645 case MODE_VECTOR_INT: | |
4646 if (TARGET_ARCH64 && TARGET_FPU && named) | |
4647 { | |
4648 if (slotno >= SPARC_FP_ARG_MAX) | |
4649 return -1; | |
4650 regno = SPARC_FP_ARG_FIRST + slotno * 2; | |
4651 /* Arguments filling only one single FP register are | |
4652 right-justified in the outer double FP register. */ | |
4653 if (GET_MODE_SIZE (mode) <= 4) | |
4654 regno++; | |
4655 break; | |
4656 } | |
4657 /* fallthrough */ | |
4658 | |
4659 case MODE_INT: | |
4660 case MODE_COMPLEX_INT: | |
4661 if (slotno >= SPARC_INT_ARG_MAX) | |
4662 return -1; | |
4663 regno = regbase + slotno; | |
4664 break; | |
4665 | |
4666 case MODE_RANDOM: | |
4667 if (mode == VOIDmode) | |
4668 /* MODE is VOIDmode when generating the actual call. */ | |
4669 return -1; | |
4670 | |
4671 gcc_assert (mode == BLKmode); | |
4672 | |
4673 if (TARGET_ARCH32 | |
4674 || !type | |
4675 || (TREE_CODE (type) != VECTOR_TYPE | |
4676 && TREE_CODE (type) != RECORD_TYPE)) | |
4677 { | |
4678 if (slotno >= SPARC_INT_ARG_MAX) | |
4679 return -1; | |
4680 regno = regbase + slotno; | |
4681 } | |
4682 else /* TARGET_ARCH64 && type */ | |
4683 { | |
4684 int intregs_p = 0, fpregs_p = 0, packed_p = 0; | |
4685 | |
4686 /* First see what kinds of registers we would need. */ | |
4687 if (TREE_CODE (type) == VECTOR_TYPE) | |
4688 fpregs_p = 1; | |
4689 else | |
4690 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p); | |
4691 | |
4692 /* The ABI obviously doesn't specify how packed structures | |
4693 are passed. These are defined to be passed in int regs | |
4694 if possible, otherwise memory. */ | |
4695 if (packed_p || !named) | |
4696 fpregs_p = 0, intregs_p = 1; | |
4697 | |
4698 /* If all arg slots are filled, then must pass on stack. */ | |
4699 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX) | |
4700 return -1; | |
4701 | |
4702 /* If there are only int args and all int arg slots are filled, | |
4703 then must pass on stack. */ | |
4704 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX) | |
4705 return -1; | |
4706 | |
4707 /* Note that even if all int arg slots are filled, fp members may | |
4708 still be passed in regs if such regs are available. | |
4709 *PREGNO isn't set because there may be more than one, it's up | |
4710 to the caller to compute them. */ | |
4711 return slotno; | |
4712 } | |
4713 break; | |
4714 | |
4715 default : | |
4716 gcc_unreachable (); | |
4717 } | |
4718 | |
4719 *pregno = regno; | |
4720 return slotno; | |
4721 } | |
4722 | |
4723 /* Handle recursive register counting for structure field layout. */ | |
4724 | |
4725 struct function_arg_record_value_parms | |
4726 { | |
4727 rtx ret; /* return expression being built. */ | |
4728 int slotno; /* slot number of the argument. */ | |
4729 int named; /* whether the argument is named. */ | |
4730 int regbase; /* regno of the base register. */ | |
4731 int stack; /* 1 if part of the argument is on the stack. */ | |
4732 int intoffset; /* offset of the first pending integer field. */ | |
4733 unsigned int nregs; /* number of words passed in registers. */ | |
4734 }; | |
4735 | |
4736 static void function_arg_record_value_3 | |
4737 (HOST_WIDE_INT, struct function_arg_record_value_parms *); | |
4738 static void function_arg_record_value_2 | |
4739 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); | |
4740 static void function_arg_record_value_1 | |
4741 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); | |
4742 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int); | |
4743 static rtx function_arg_union_value (int, enum machine_mode, int, int); | |
4744 | |
4745 /* A subroutine of function_arg_record_value. Traverse the structure | |
4746 recursively and determine how many registers will be required. */ | |
4747 | |
4748 static void | |
4749 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos, | |
4750 struct function_arg_record_value_parms *parms, | |
4751 bool packed_p) | |
4752 { | |
4753 tree field; | |
4754 | |
4755 /* We need to compute how many registers are needed so we can | |
4756 allocate the PARALLEL but before we can do that we need to know | |
4757 whether there are any packed fields. The ABI obviously doesn't | |
4758 specify how structures are passed in this case, so they are | |
4759 defined to be passed in int regs if possible, otherwise memory, | |
4760 regardless of whether there are fp values present. */ | |
4761 | |
4762 if (! packed_p) | |
4763 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
4764 { | |
4765 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) | |
4766 { | |
4767 packed_p = true; | |
4768 break; | |
4769 } | |
4770 } | |
4771 | |
4772 /* Compute how many registers we need. */ | |
4773 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
4774 { | |
4775 if (TREE_CODE (field) == FIELD_DECL) | |
4776 { | |
4777 HOST_WIDE_INT bitpos = startbitpos; | |
4778 | |
4779 if (DECL_SIZE (field) != 0) | |
4780 { | |
4781 if (integer_zerop (DECL_SIZE (field))) | |
4782 continue; | |
4783 | |
4784 if (host_integerp (bit_position (field), 1)) | |
4785 bitpos += int_bit_position (field); | |
4786 } | |
4787 | |
4788 /* ??? FIXME: else assume zero offset. */ | |
4789 | |
4790 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) | |
4791 function_arg_record_value_1 (TREE_TYPE (field), | |
4792 bitpos, | |
4793 parms, | |
4794 packed_p); | |
4795 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) | |
4796 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) | |
4797 && TARGET_FPU | |
4798 && parms->named | |
4799 && ! packed_p) | |
4800 { | |
4801 if (parms->intoffset != -1) | |
4802 { | |
4803 unsigned int startbit, endbit; | |
4804 int intslots, this_slotno; | |
4805 | |
4806 startbit = parms->intoffset & -BITS_PER_WORD; | |
4807 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; | |
4808 | |
4809 intslots = (endbit - startbit) / BITS_PER_WORD; | |
4810 this_slotno = parms->slotno + parms->intoffset | |
4811 / BITS_PER_WORD; | |
4812 | |
4813 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) | |
4814 { | |
4815 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); | |
4816 /* We need to pass this field on the stack. */ | |
4817 parms->stack = 1; | |
4818 } | |
4819 | |
4820 parms->nregs += intslots; | |
4821 parms->intoffset = -1; | |
4822 } | |
4823 | |
4824 /* There's no need to check this_slotno < SPARC_FP_ARG MAX. | |
4825 If it wasn't true we wouldn't be here. */ | |
4826 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE | |
4827 && DECL_MODE (field) == BLKmode) | |
4828 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); | |
4829 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) | |
4830 parms->nregs += 2; | |
4831 else | |
4832 parms->nregs += 1; | |
4833 } | |
4834 else | |
4835 { | |
4836 if (parms->intoffset == -1) | |
4837 parms->intoffset = bitpos; | |
4838 } | |
4839 } | |
4840 } | |
4841 } | |
4842 | |
4843 /* A subroutine of function_arg_record_value. Assign the bits of the | |
4844 structure between parms->intoffset and bitpos to integer registers. */ | |
4845 | |
4846 static void | |
4847 function_arg_record_value_3 (HOST_WIDE_INT bitpos, | |
4848 struct function_arg_record_value_parms *parms) | |
4849 { | |
4850 enum machine_mode mode; | |
4851 unsigned int regno; | |
4852 unsigned int startbit, endbit; | |
4853 int this_slotno, intslots, intoffset; | |
4854 rtx reg; | |
4855 | |
4856 if (parms->intoffset == -1) | |
4857 return; | |
4858 | |
4859 intoffset = parms->intoffset; | |
4860 parms->intoffset = -1; | |
4861 | |
4862 startbit = intoffset & -BITS_PER_WORD; | |
4863 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; | |
4864 intslots = (endbit - startbit) / BITS_PER_WORD; | |
4865 this_slotno = parms->slotno + intoffset / BITS_PER_WORD; | |
4866 | |
4867 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); | |
4868 if (intslots <= 0) | |
4869 return; | |
4870 | |
4871 /* If this is the trailing part of a word, only load that much into | |
4872 the register. Otherwise load the whole register. Note that in | |
4873 the latter case we may pick up unwanted bits. It's not a problem | |
4874 at the moment but may wish to revisit. */ | |
4875 | |
4876 if (intoffset % BITS_PER_WORD != 0) | |
4877 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, | |
4878 MODE_INT); | |
4879 else | |
4880 mode = word_mode; | |
4881 | |
4882 intoffset /= BITS_PER_UNIT; | |
4883 do | |
4884 { | |
4885 regno = parms->regbase + this_slotno; | |
4886 reg = gen_rtx_REG (mode, regno); | |
4887 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) | |
4888 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); | |
4889 | |
4890 this_slotno += 1; | |
4891 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; | |
4892 mode = word_mode; | |
4893 parms->nregs += 1; | |
4894 intslots -= 1; | |
4895 } | |
4896 while (intslots > 0); | |
4897 } | |
4898 | |
4899 /* A subroutine of function_arg_record_value. Traverse the structure | |
4900 recursively and assign bits to floating point registers. Track which | |
4901 bits in between need integer registers; invoke function_arg_record_value_3 | |
4902 to make that happen. */ | |
4903 | |
4904 static void | |
4905 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos, | |
4906 struct function_arg_record_value_parms *parms, | |
4907 bool packed_p) | |
4908 { | |
4909 tree field; | |
4910 | |
4911 if (! packed_p) | |
4912 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
4913 { | |
4914 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) | |
4915 { | |
4916 packed_p = true; | |
4917 break; | |
4918 } | |
4919 } | |
4920 | |
4921 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
4922 { | |
4923 if (TREE_CODE (field) == FIELD_DECL) | |
4924 { | |
4925 HOST_WIDE_INT bitpos = startbitpos; | |
4926 | |
4927 if (DECL_SIZE (field) != 0) | |
4928 { | |
4929 if (integer_zerop (DECL_SIZE (field))) | |
4930 continue; | |
4931 | |
4932 if (host_integerp (bit_position (field), 1)) | |
4933 bitpos += int_bit_position (field); | |
4934 } | |
4935 | |
4936 /* ??? FIXME: else assume zero offset. */ | |
4937 | |
4938 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) | |
4939 function_arg_record_value_2 (TREE_TYPE (field), | |
4940 bitpos, | |
4941 parms, | |
4942 packed_p); | |
4943 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) | |
4944 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) | |
4945 && TARGET_FPU | |
4946 && parms->named | |
4947 && ! packed_p) | |
4948 { | |
4949 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD; | |
4950 int regno, nregs, pos; | |
4951 enum machine_mode mode = DECL_MODE (field); | |
4952 rtx reg; | |
4953 | |
4954 function_arg_record_value_3 (bitpos, parms); | |
4955 | |
4956 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE | |
4957 && mode == BLKmode) | |
4958 { | |
4959 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); | |
4960 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); | |
4961 } | |
4962 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) | |
4963 { | |
4964 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); | |
4965 nregs = 2; | |
4966 } | |
4967 else | |
4968 nregs = 1; | |
4969 | |
4970 regno = SPARC_FP_ARG_FIRST + this_slotno * 2; | |
4971 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) | |
4972 regno++; | |
4973 reg = gen_rtx_REG (mode, regno); | |
4974 pos = bitpos / BITS_PER_UNIT; | |
4975 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) | |
4976 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); | |
4977 parms->nregs += 1; | |
4978 while (--nregs > 0) | |
4979 { | |
4980 regno += GET_MODE_SIZE (mode) / 4; | |
4981 reg = gen_rtx_REG (mode, regno); | |
4982 pos += GET_MODE_SIZE (mode); | |
4983 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) | |
4984 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); | |
4985 parms->nregs += 1; | |
4986 } | |
4987 } | |
4988 else | |
4989 { | |
4990 if (parms->intoffset == -1) | |
4991 parms->intoffset = bitpos; | |
4992 } | |
4993 } | |
4994 } | |
4995 } | |
4996 | |
4997 /* Used by function_arg and function_value to implement the complex | |
4998 conventions of the 64-bit ABI for passing and returning structures. | |
4999 Return an expression valid as a return value for the two macros | |
5000 FUNCTION_ARG and FUNCTION_VALUE. | |
5001 | |
5002 TYPE is the data type of the argument (as a tree). | |
5003 This is null for libcalls where that information may | |
5004 not be available. | |
5005 MODE is the argument's machine mode. | |
5006 SLOTNO is the index number of the argument's slot in the parameter array. | |
5007 NAMED is nonzero if this argument is a named parameter | |
5008 (otherwise it is an extra parameter matching an ellipsis). | |
5009 REGBASE is the regno of the base register for the parameter array. */ | |
5010 | |
5011 static rtx | |
5012 function_arg_record_value (const_tree type, enum machine_mode mode, | |
5013 int slotno, int named, int regbase) | |
5014 { | |
5015 HOST_WIDE_INT typesize = int_size_in_bytes (type); | |
5016 struct function_arg_record_value_parms parms; | |
5017 unsigned int nregs; | |
5018 | |
5019 parms.ret = NULL_RTX; | |
5020 parms.slotno = slotno; | |
5021 parms.named = named; | |
5022 parms.regbase = regbase; | |
5023 parms.stack = 0; | |
5024 | |
5025 /* Compute how many registers we need. */ | |
5026 parms.nregs = 0; | |
5027 parms.intoffset = 0; | |
5028 function_arg_record_value_1 (type, 0, &parms, false); | |
5029 | |
5030 /* Take into account pending integer fields. */ | |
5031 if (parms.intoffset != -1) | |
5032 { | |
5033 unsigned int startbit, endbit; | |
5034 int intslots, this_slotno; | |
5035 | |
5036 startbit = parms.intoffset & -BITS_PER_WORD; | |
5037 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD; | |
5038 intslots = (endbit - startbit) / BITS_PER_WORD; | |
5039 this_slotno = slotno + parms.intoffset / BITS_PER_WORD; | |
5040 | |
5041 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) | |
5042 { | |
5043 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); | |
5044 /* We need to pass this field on the stack. */ | |
5045 parms.stack = 1; | |
5046 } | |
5047 | |
5048 parms.nregs += intslots; | |
5049 } | |
5050 nregs = parms.nregs; | |
5051 | |
5052 /* Allocate the vector and handle some annoying special cases. */ | |
5053 if (nregs == 0) | |
5054 { | |
5055 /* ??? Empty structure has no value? Duh? */ | |
5056 if (typesize <= 0) | |
5057 { | |
5058 /* Though there's nothing really to store, return a word register | |
5059 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL | |
5060 leads to breakage due to the fact that there are zero bytes to | |
5061 load. */ | |
5062 return gen_rtx_REG (mode, regbase); | |
5063 } | |
5064 else | |
5065 { | |
5066 /* ??? C++ has structures with no fields, and yet a size. Give up | |
5067 for now and pass everything back in integer registers. */ | |
5068 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
5069 } | |
5070 if (nregs + slotno > SPARC_INT_ARG_MAX) | |
5071 nregs = SPARC_INT_ARG_MAX - slotno; | |
5072 } | |
5073 gcc_assert (nregs != 0); | |
5074 | |
5075 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs)); | |
5076 | |
5077 /* If at least one field must be passed on the stack, generate | |
5078 (parallel [(expr_list (nil) ...) ...]) so that all fields will | |
5079 also be passed on the stack. We can't do much better because the | |
5080 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case | |
5081 of structures for which the fields passed exclusively in registers | |
5082 are not at the beginning of the structure. */ | |
5083 if (parms.stack) | |
5084 XVECEXP (parms.ret, 0, 0) | |
5085 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
5086 | |
5087 /* Fill in the entries. */ | |
5088 parms.nregs = 0; | |
5089 parms.intoffset = 0; | |
5090 function_arg_record_value_2 (type, 0, &parms, false); | |
5091 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms); | |
5092 | |
5093 gcc_assert (parms.nregs == nregs); | |
5094 | |
5095 return parms.ret; | |
5096 } | |
5097 | |
5098 /* Used by function_arg and function_value to implement the conventions | |
5099 of the 64-bit ABI for passing and returning unions. | |
5100 Return an expression valid as a return value for the two macros | |
5101 FUNCTION_ARG and FUNCTION_VALUE. | |
5102 | |
5103 SIZE is the size in bytes of the union. | |
5104 MODE is the argument's machine mode. | |
5105 REGNO is the hard register the union will be passed in. */ | |
5106 | |
5107 static rtx | |
5108 function_arg_union_value (int size, enum machine_mode mode, int slotno, | |
5109 int regno) | |
5110 { | |
5111 int nwords = ROUND_ADVANCE (size), i; | |
5112 rtx regs; | |
5113 | |
5114 /* See comment in previous function for empty structures. */ | |
5115 if (nwords == 0) | |
5116 return gen_rtx_REG (mode, regno); | |
5117 | |
5118 if (slotno == SPARC_INT_ARG_MAX - 1) | |
5119 nwords = 1; | |
5120 | |
5121 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); | |
5122 | |
5123 for (i = 0; i < nwords; i++) | |
5124 { | |
5125 /* Unions are passed left-justified. */ | |
5126 XVECEXP (regs, 0, i) | |
5127 = gen_rtx_EXPR_LIST (VOIDmode, | |
5128 gen_rtx_REG (word_mode, regno), | |
5129 GEN_INT (UNITS_PER_WORD * i)); | |
5130 regno++; | |
5131 } | |
5132 | |
5133 return regs; | |
5134 } | |
5135 | |
5136 /* Used by function_arg and function_value to implement the conventions | |
5137 for passing and returning large (BLKmode) vectors. | |
5138 Return an expression valid as a return value for the two macros | |
5139 FUNCTION_ARG and FUNCTION_VALUE. | |
5140 | |
5141 SIZE is the size in bytes of the vector (at least 8 bytes). | |
5142 REGNO is the FP hard register the vector will be passed in. */ | |
5143 | |
5144 static rtx | |
5145 function_arg_vector_value (int size, int regno) | |
5146 { | |
5147 int i, nregs = size / 8; | |
5148 rtx regs; | |
5149 | |
5150 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); | |
5151 | |
5152 for (i = 0; i < nregs; i++) | |
5153 { | |
5154 XVECEXP (regs, 0, i) | |
5155 = gen_rtx_EXPR_LIST (VOIDmode, | |
5156 gen_rtx_REG (DImode, regno + 2*i), | |
5157 GEN_INT (i*8)); | |
5158 } | |
5159 | |
5160 return regs; | |
5161 } | |
5162 | |
5163 /* Handle the FUNCTION_ARG macro. | |
5164 Determine where to put an argument to a function. | |
5165 Value is zero to push the argument on the stack, | |
5166 or a hard register in which to store the argument. | |
5167 | |
5168 CUM is a variable of type CUMULATIVE_ARGS which gives info about | |
5169 the preceding args and about the function being called. | |
5170 MODE is the argument's machine mode. | |
5171 TYPE is the data type of the argument (as a tree). | |
5172 This is null for libcalls where that information may | |
5173 not be available. | |
5174 NAMED is nonzero if this argument is a named parameter | |
5175 (otherwise it is an extra parameter matching an ellipsis). | |
5176 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */ | |
5177 | |
5178 rtx | |
5179 function_arg (const struct sparc_args *cum, enum machine_mode mode, | |
5180 tree type, int named, int incoming_p) | |
5181 { | |
5182 int regbase = (incoming_p | |
5183 ? SPARC_INCOMING_INT_ARG_FIRST | |
5184 : SPARC_OUTGOING_INT_ARG_FIRST); | |
5185 int slotno, regno, padding; | |
5186 enum mode_class mclass = GET_MODE_CLASS (mode); | |
5187 | |
5188 slotno = function_arg_slotno (cum, mode, type, named, incoming_p, | |
5189 ®no, &padding); | |
5190 if (slotno == -1) | |
5191 return 0; | |
5192 | |
5193 /* Vector types deserve special treatment because they are polymorphic wrt | |
5194 their mode, depending upon whether VIS instructions are enabled. */ | |
5195 if (type && TREE_CODE (type) == VECTOR_TYPE) | |
5196 { | |
5197 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5198 gcc_assert ((TARGET_ARCH32 && size <= 8) | |
5199 || (TARGET_ARCH64 && size <= 16)); | |
5200 | |
5201 if (mode == BLKmode) | |
5202 return function_arg_vector_value (size, | |
5203 SPARC_FP_ARG_FIRST + 2*slotno); | |
5204 else | |
5205 mclass = MODE_FLOAT; | |
5206 } | |
5207 | |
5208 if (TARGET_ARCH32) | |
5209 return gen_rtx_REG (mode, regno); | |
5210 | |
5211 /* Structures up to 16 bytes in size are passed in arg slots on the stack | |
5212 and are promoted to registers if possible. */ | |
5213 if (type && TREE_CODE (type) == RECORD_TYPE) | |
5214 { | |
5215 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5216 gcc_assert (size <= 16); | |
5217 | |
5218 return function_arg_record_value (type, mode, slotno, named, regbase); | |
5219 } | |
5220 | |
5221 /* Unions up to 16 bytes in size are passed in integer registers. */ | |
5222 else if (type && TREE_CODE (type) == UNION_TYPE) | |
5223 { | |
5224 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5225 gcc_assert (size <= 16); | |
5226 | |
5227 return function_arg_union_value (size, mode, slotno, regno); | |
5228 } | |
5229 | |
5230 /* v9 fp args in reg slots beyond the int reg slots get passed in regs | |
5231 but also have the slot allocated for them. | |
5232 If no prototype is in scope fp values in register slots get passed | |
5233 in two places, either fp regs and int regs or fp regs and memory. */ | |
5234 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) | |
5235 && SPARC_FP_REG_P (regno)) | |
5236 { | |
5237 rtx reg = gen_rtx_REG (mode, regno); | |
5238 if (cum->prototype_p || cum->libcall_p) | |
5239 { | |
5240 /* "* 2" because fp reg numbers are recorded in 4 byte | |
5241 quantities. */ | |
5242 #if 0 | |
5243 /* ??? This will cause the value to be passed in the fp reg and | |
5244 in the stack. When a prototype exists we want to pass the | |
5245 value in the reg but reserve space on the stack. That's an | |
5246 optimization, and is deferred [for a bit]. */ | |
5247 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2) | |
5248 return gen_rtx_PARALLEL (mode, | |
5249 gen_rtvec (2, | |
5250 gen_rtx_EXPR_LIST (VOIDmode, | |
5251 NULL_RTX, const0_rtx), | |
5252 gen_rtx_EXPR_LIST (VOIDmode, | |
5253 reg, const0_rtx))); | |
5254 else | |
5255 #else | |
5256 /* ??? It seems that passing back a register even when past | |
5257 the area declared by REG_PARM_STACK_SPACE will allocate | |
5258 space appropriately, and will not copy the data onto the | |
5259 stack, exactly as we desire. | |
5260 | |
5261 This is due to locate_and_pad_parm being called in | |
5262 expand_call whenever reg_parm_stack_space > 0, which | |
5263 while beneficial to our example here, would seem to be | |
5264 in error from what had been intended. Ho hum... -- r~ */ | |
5265 #endif | |
5266 return reg; | |
5267 } | |
5268 else | |
5269 { | |
5270 rtx v0, v1; | |
5271 | |
5272 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) | |
5273 { | |
5274 int intreg; | |
5275 | |
5276 /* On incoming, we don't need to know that the value | |
5277 is passed in %f0 and %i0, and it confuses other parts | |
5278 causing needless spillage even on the simplest cases. */ | |
5279 if (incoming_p) | |
5280 return reg; | |
5281 | |
5282 intreg = (SPARC_OUTGOING_INT_ARG_FIRST | |
5283 + (regno - SPARC_FP_ARG_FIRST) / 2); | |
5284 | |
5285 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); | |
5286 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), | |
5287 const0_rtx); | |
5288 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); | |
5289 } | |
5290 else | |
5291 { | |
5292 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
5293 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); | |
5294 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); | |
5295 } | |
5296 } | |
5297 } | |
5298 | |
5299 /* All other aggregate types are passed in an integer register in a mode | |
5300 corresponding to the size of the type. */ | |
5301 else if (type && AGGREGATE_TYPE_P (type)) | |
5302 { | |
5303 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5304 gcc_assert (size <= 16); | |
5305 | |
5306 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); | |
5307 } | |
5308 | |
5309 return gen_rtx_REG (mode, regno); | |
5310 } | |
5311 | |
5312 /* For an arg passed partly in registers and partly in memory, | |
5313 this is the number of bytes of registers used. | |
5314 For args passed entirely in registers or entirely in memory, zero. | |
5315 | |
5316 Any arg that starts in the first 6 regs but won't entirely fit in them | |
5317 needs partial registers on v8. On v9, structures with integer | |
5318 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp | |
5319 values that begin in the last fp reg [where "last fp reg" varies with the | |
5320 mode] will be split between that reg and memory. */ | |
5321 | |
5322 static int | |
5323 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, | |
5324 tree type, bool named) | |
5325 { | |
5326 int slotno, regno, padding; | |
5327 | |
5328 /* We pass 0 for incoming_p here, it doesn't matter. */ | |
5329 slotno = function_arg_slotno (cum, mode, type, named, 0, ®no, &padding); | |
5330 | |
5331 if (slotno == -1) | |
5332 return 0; | |
5333 | |
5334 if (TARGET_ARCH32) | |
5335 { | |
5336 if ((slotno + (mode == BLKmode | |
5337 ? ROUND_ADVANCE (int_size_in_bytes (type)) | |
5338 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))) | |
5339 > SPARC_INT_ARG_MAX) | |
5340 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD; | |
5341 } | |
5342 else | |
5343 { | |
5344 /* We are guaranteed by pass_by_reference that the size of the | |
5345 argument is not greater than 16 bytes, so we only need to return | |
5346 one word if the argument is partially passed in registers. */ | |
5347 | |
5348 if (type && AGGREGATE_TYPE_P (type)) | |
5349 { | |
5350 int size = int_size_in_bytes (type); | |
5351 | |
5352 if (size > UNITS_PER_WORD | |
5353 && slotno == SPARC_INT_ARG_MAX - 1) | |
5354 return UNITS_PER_WORD; | |
5355 } | |
5356 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT | |
5357 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT | |
5358 && ! (TARGET_FPU && named))) | |
5359 { | |
5360 /* The complex types are passed as packed types. */ | |
5361 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
5362 && slotno == SPARC_INT_ARG_MAX - 1) | |
5363 return UNITS_PER_WORD; | |
5364 } | |
5365 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) | |
5366 { | |
5367 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD) | |
5368 > SPARC_FP_ARG_MAX) | |
5369 return UNITS_PER_WORD; | |
5370 } | |
5371 } | |
5372 | |
5373 return 0; | |
5374 } | |
5375 | |
5376 /* Handle the TARGET_PASS_BY_REFERENCE target hook. | |
5377 Specify whether to pass the argument by reference. */ | |
5378 | |
5379 static bool | |
5380 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, | |
5381 enum machine_mode mode, const_tree type, | |
5382 bool named ATTRIBUTE_UNUSED) | |
5383 { | |
5384 if (TARGET_ARCH32) | |
5385 /* Original SPARC 32-bit ABI says that structures and unions, | |
5386 and quad-precision floats are passed by reference. For Pascal, | |
5387 also pass arrays by reference. All other base types are passed | |
5388 in registers. | |
5389 | |
5390 Extended ABI (as implemented by the Sun compiler) says that all | |
5391 complex floats are passed by reference. Pass complex integers | |
5392 in registers up to 8 bytes. More generally, enforce the 2-word | |
5393 cap for passing arguments in registers. | |
5394 | |
5395 Vector ABI (as implemented by the Sun VIS SDK) says that vector | |
5396 integers are passed like floats of the same size, that is in | |
5397 registers up to 8 bytes. Pass all vector floats by reference | |
5398 like structure and unions. */ | |
5399 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) | |
5400 || mode == SCmode | |
5401 /* Catch CDImode, TFmode, DCmode and TCmode. */ | |
5402 || GET_MODE_SIZE (mode) > 8 | |
5403 || (type | |
5404 && TREE_CODE (type) == VECTOR_TYPE | |
5405 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); | |
5406 else | |
5407 /* Original SPARC 64-bit ABI says that structures and unions | |
5408 smaller than 16 bytes are passed in registers, as well as | |
5409 all other base types. | |
5410 | |
5411 Extended ABI (as implemented by the Sun compiler) says that | |
5412 complex floats are passed in registers up to 16 bytes. Pass | |
5413 all complex integers in registers up to 16 bytes. More generally, | |
5414 enforce the 2-word cap for passing arguments in registers. | |
5415 | |
5416 Vector ABI (as implemented by the Sun VIS SDK) says that vector | |
5417 integers are passed like floats of the same size, that is in | |
5418 registers (up to 16 bytes). Pass all vector floats like structure | |
5419 and unions. */ | |
5420 return ((type | |
5421 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE) | |
5422 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) | |
5423 /* Catch CTImode and TCmode. */ | |
5424 || GET_MODE_SIZE (mode) > 16); | |
5425 } | |
5426 | |
5427 /* Handle the FUNCTION_ARG_ADVANCE macro. | |
5428 Update the data in CUM to advance over an argument | |
5429 of mode MODE and data type TYPE. | |
5430 TYPE is null for libcalls where that information may not be available. */ | |
5431 | |
5432 void | |
5433 function_arg_advance (struct sparc_args *cum, enum machine_mode mode, | |
5434 tree type, int named) | |
5435 { | |
5436 int slotno, regno, padding; | |
5437 | |
5438 /* We pass 0 for incoming_p here, it doesn't matter. */ | |
5439 slotno = function_arg_slotno (cum, mode, type, named, 0, ®no, &padding); | |
5440 | |
5441 /* If register required leading padding, add it. */ | |
5442 if (slotno != -1) | |
5443 cum->words += padding; | |
5444 | |
5445 if (TARGET_ARCH32) | |
5446 { | |
5447 cum->words += (mode != BLKmode | |
5448 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) | |
5449 : ROUND_ADVANCE (int_size_in_bytes (type))); | |
5450 } | |
5451 else | |
5452 { | |
5453 if (type && AGGREGATE_TYPE_P (type)) | |
5454 { | |
5455 int size = int_size_in_bytes (type); | |
5456 | |
5457 if (size <= 8) | |
5458 ++cum->words; | |
5459 else if (size <= 16) | |
5460 cum->words += 2; | |
5461 else /* passed by reference */ | |
5462 ++cum->words; | |
5463 } | |
5464 else | |
5465 { | |
5466 cum->words += (mode != BLKmode | |
5467 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) | |
5468 : ROUND_ADVANCE (int_size_in_bytes (type))); | |
5469 } | |
5470 } | |
5471 } | |
5472 | |
5473 /* Handle the FUNCTION_ARG_PADDING macro. | |
5474 For the 64 bit ABI structs are always stored left shifted in their | |
5475 argument slot. */ | |
5476 | |
5477 enum direction | |
5478 function_arg_padding (enum machine_mode mode, const_tree type) | |
5479 { | |
5480 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type)) | |
5481 return upward; | |
5482 | |
5483 /* Fall back to the default. */ | |
5484 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); | |
5485 } | |
5486 | |
5487 /* Handle the TARGET_RETURN_IN_MEMORY target hook. | |
5488 Specify whether to return the return value in memory. */ | |
5489 | |
5490 static bool | |
5491 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) | |
5492 { | |
5493 if (TARGET_ARCH32) | |
5494 /* Original SPARC 32-bit ABI says that structures and unions, | |
5495 and quad-precision floats are returned in memory. All other | |
5496 base types are returned in registers. | |
5497 | |
5498 Extended ABI (as implemented by the Sun compiler) says that | |
5499 all complex floats are returned in registers (8 FP registers | |
5500 at most for '_Complex long double'). Return all complex integers | |
5501 in registers (4 at most for '_Complex long long'). | |
5502 | |
5503 Vector ABI (as implemented by the Sun VIS SDK) says that vector | |
5504 integers are returned like floats of the same size, that is in | |
5505 registers up to 8 bytes and in memory otherwise. Return all | |
5506 vector floats in memory like structure and unions; note that | |
5507 they always have BLKmode like the latter. */ | |
5508 return (TYPE_MODE (type) == BLKmode | |
5509 || TYPE_MODE (type) == TFmode | |
5510 || (TREE_CODE (type) == VECTOR_TYPE | |
5511 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); | |
5512 else | |
5513 /* Original SPARC 64-bit ABI says that structures and unions | |
5514 smaller than 32 bytes are returned in registers, as well as | |
5515 all other base types. | |
5516 | |
5517 Extended ABI (as implemented by the Sun compiler) says that all | |
5518 complex floats are returned in registers (8 FP registers at most | |
5519 for '_Complex long double'). Return all complex integers in | |
5520 registers (4 at most for '_Complex TItype'). | |
5521 | |
5522 Vector ABI (as implemented by the Sun VIS SDK) says that vector | |
5523 integers are returned like floats of the same size, that is in | |
5524 registers. Return all vector floats like structure and unions; | |
5525 note that they always have BLKmode like the latter. */ | |
5526 return ((TYPE_MODE (type) == BLKmode | |
5527 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32)); | |
5528 } | |
5529 | |
5530 /* Handle the TARGET_STRUCT_VALUE target hook. | |
5531 Return where to find the structure return value address. */ | |
5532 | |
5533 static rtx | |
5534 sparc_struct_value_rtx (tree fndecl, int incoming) | |
5535 { | |
5536 if (TARGET_ARCH64) | |
5537 return 0; | |
5538 else | |
5539 { | |
5540 rtx mem; | |
5541 | |
5542 if (incoming) | |
5543 mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx, | |
5544 STRUCT_VALUE_OFFSET)); | |
5545 else | |
5546 mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, | |
5547 STRUCT_VALUE_OFFSET)); | |
5548 | |
5549 /* Only follow the SPARC ABI for fixed-size structure returns. | |
5550 Variable size structure returns are handled per the normal | |
5551 procedures in GCC. This is enabled by -mstd-struct-return */ | |
5552 if (incoming == 2 | |
5553 && sparc_std_struct_return | |
5554 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) | |
5555 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) | |
5556 { | |
5557 /* We must check and adjust the return address, as it is | |
5558 optional as to whether the return object is really | |
5559 provided. */ | |
5560 rtx ret_rtx = gen_rtx_REG (Pmode, 31); | |
5561 rtx scratch = gen_reg_rtx (SImode); | |
5562 rtx endlab = gen_label_rtx (); | |
5563 | |
5564 /* Calculate the return object size */ | |
5565 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); | |
5566 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); | |
5567 /* Construct a temporary return value */ | |
5568 rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); | |
5569 | |
5570 /* Implement SPARC 32-bit psABI callee returns struck checking | |
5571 requirements: | |
5572 | |
5573 Fetch the instruction where we will return to and see if | |
5574 it's an unimp instruction (the most significant 10 bits | |
5575 will be zero). */ | |
5576 emit_move_insn (scratch, gen_rtx_MEM (SImode, | |
5577 plus_constant (ret_rtx, 8))); | |
5578 /* Assume the size is valid and pre-adjust */ | |
5579 emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4))); | |
5580 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab); | |
5581 emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4))); | |
5582 /* Assign stack temp: | |
5583 Write the address of the memory pointed to by temp_val into | |
5584 the memory pointed to by mem */ | |
5585 emit_move_insn (mem, XEXP (temp_val, 0)); | |
5586 emit_label (endlab); | |
5587 } | |
5588 | |
5589 set_mem_alias_set (mem, struct_value_alias_set); | |
5590 return mem; | |
5591 } | |
5592 } | |
5593 | |
5594 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros. | |
5595 For v9, function return values are subject to the same rules as arguments, | |
5596 except that up to 32 bytes may be returned in registers. */ | |
5597 | |
5598 rtx | |
5599 function_value (const_tree type, enum machine_mode mode, int incoming_p) | |
5600 { | |
5601 /* Beware that the two values are swapped here wrt function_arg. */ | |
5602 int regbase = (incoming_p | |
5603 ? SPARC_OUTGOING_INT_ARG_FIRST | |
5604 : SPARC_INCOMING_INT_ARG_FIRST); | |
5605 enum mode_class mclass = GET_MODE_CLASS (mode); | |
5606 int regno; | |
5607 | |
5608 /* Vector types deserve special treatment because they are polymorphic wrt | |
5609 their mode, depending upon whether VIS instructions are enabled. */ | |
5610 if (type && TREE_CODE (type) == VECTOR_TYPE) | |
5611 { | |
5612 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5613 gcc_assert ((TARGET_ARCH32 && size <= 8) | |
5614 || (TARGET_ARCH64 && size <= 32)); | |
5615 | |
5616 if (mode == BLKmode) | |
5617 return function_arg_vector_value (size, | |
5618 SPARC_FP_ARG_FIRST); | |
5619 else | |
5620 mclass = MODE_FLOAT; | |
5621 } | |
5622 | |
5623 if (TARGET_ARCH64 && type) | |
5624 { | |
5625 /* Structures up to 32 bytes in size are returned in registers. */ | |
5626 if (TREE_CODE (type) == RECORD_TYPE) | |
5627 { | |
5628 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5629 gcc_assert (size <= 32); | |
5630 | |
5631 return function_arg_record_value (type, mode, 0, 1, regbase); | |
5632 } | |
5633 | |
5634 /* Unions up to 32 bytes in size are returned in integer registers. */ | |
5635 else if (TREE_CODE (type) == UNION_TYPE) | |
5636 { | |
5637 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5638 gcc_assert (size <= 32); | |
5639 | |
5640 return function_arg_union_value (size, mode, 0, regbase); | |
5641 } | |
5642 | |
5643 /* Objects that require it are returned in FP registers. */ | |
5644 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) | |
5645 ; | |
5646 | |
5647 /* All other aggregate types are returned in an integer register in a | |
5648 mode corresponding to the size of the type. */ | |
5649 else if (AGGREGATE_TYPE_P (type)) | |
5650 { | |
5651 /* All other aggregate types are passed in an integer register | |
5652 in a mode corresponding to the size of the type. */ | |
5653 HOST_WIDE_INT size = int_size_in_bytes (type); | |
5654 gcc_assert (size <= 32); | |
5655 | |
5656 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); | |
5657 | |
5658 /* ??? We probably should have made the same ABI change in | |
5659 3.4.0 as the one we made for unions. The latter was | |
5660 required by the SCD though, while the former is not | |
5661 specified, so we favored compatibility and efficiency. | |
5662 | |
5663 Now we're stuck for aggregates larger than 16 bytes, | |
5664 because OImode vanished in the meantime. Let's not | |
5665 try to be unduly clever, and simply follow the ABI | |
5666 for unions in that case. */ | |
5667 if (mode == BLKmode) | |
5668 return function_arg_union_value (size, mode, 0, regbase); | |
5669 else | |
5670 mclass = MODE_INT; | |
5671 } | |
5672 | |
5673 /* This must match PROMOTE_FUNCTION_MODE. */ | |
5674 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5675 mode = word_mode; | |
5676 } | |
5677 | |
5678 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) | |
5679 regno = SPARC_FP_ARG_FIRST; | |
5680 else | |
5681 regno = regbase; | |
5682 | |
5683 return gen_rtx_REG (mode, regno); | |
5684 } | |
5685 | |
5686 /* Do what is necessary for `va_start'. We look at the current function | |
5687 to determine if stdarg or varargs is used and return the address of | |
5688 the first unnamed parameter. */ | |
5689 | |
5690 static rtx | |
5691 sparc_builtin_saveregs (void) | |
5692 { | |
5693 int first_reg = crtl->args.info.words; | |
5694 rtx address; | |
5695 int regno; | |
5696 | |
5697 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) | |
5698 emit_move_insn (gen_rtx_MEM (word_mode, | |
5699 gen_rtx_PLUS (Pmode, | |
5700 frame_pointer_rtx, | |
5701 GEN_INT (FIRST_PARM_OFFSET (0) | |
5702 + (UNITS_PER_WORD | |
5703 * regno)))), | |
5704 gen_rtx_REG (word_mode, | |
5705 SPARC_INCOMING_INT_ARG_FIRST + regno)); | |
5706 | |
5707 address = gen_rtx_PLUS (Pmode, | |
5708 frame_pointer_rtx, | |
5709 GEN_INT (FIRST_PARM_OFFSET (0) | |
5710 + UNITS_PER_WORD * first_reg)); | |
5711 | |
5712 return address; | |
5713 } | |
5714 | |
5715 /* Implement `va_start' for stdarg. */ | |
5716 | |
5717 static void | |
5718 sparc_va_start (tree valist, rtx nextarg) | |
5719 { | |
5720 nextarg = expand_builtin_saveregs (); | |
5721 std_expand_builtin_va_start (valist, nextarg); | |
5722 } | |
5723 | |
5724 /* Implement `va_arg' for stdarg. */ | |
5725 | |
5726 static tree | |
5727 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, | |
5728 gimple_seq *post_p) | |
5729 { | |
5730 HOST_WIDE_INT size, rsize, align; | |
5731 tree addr, incr; | |
5732 bool indirect; | |
5733 tree ptrtype = build_pointer_type (type); | |
5734 | |
5735 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) | |
5736 { | |
5737 indirect = true; | |
5738 size = rsize = UNITS_PER_WORD; | |
5739 align = 0; | |
5740 } | |
5741 else | |
5742 { | |
5743 indirect = false; | |
5744 size = int_size_in_bytes (type); | |
5745 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; | |
5746 align = 0; | |
5747 | |
5748 if (TARGET_ARCH64) | |
5749 { | |
5750 /* For SPARC64, objects requiring 16-byte alignment get it. */ | |
5751 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) | |
5752 align = 2 * UNITS_PER_WORD; | |
5753 | |
5754 /* SPARC-V9 ABI states that structures up to 16 bytes in size | |
5755 are left-justified in their slots. */ | |
5756 if (AGGREGATE_TYPE_P (type)) | |
5757 { | |
5758 if (size == 0) | |
5759 size = rsize = UNITS_PER_WORD; | |
5760 else | |
5761 size = rsize; | |
5762 } | |
5763 } | |
5764 } | |
5765 | |
5766 incr = valist; | |
5767 if (align) | |
5768 { | |
5769 incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, | |
5770 size_int (align - 1)); | |
5771 incr = fold_convert (sizetype, incr); | |
5772 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, | |
5773 size_int (-align)); | |
5774 incr = fold_convert (ptr_type_node, incr); | |
5775 } | |
5776 | |
5777 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); | |
5778 addr = incr; | |
5779 | |
5780 if (BYTES_BIG_ENDIAN && size < rsize) | |
5781 addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, | |
5782 size_int (rsize - size)); | |
5783 | |
5784 if (indirect) | |
5785 { | |
5786 addr = fold_convert (build_pointer_type (ptrtype), addr); | |
5787 addr = build_va_arg_indirect_ref (addr); | |
5788 } | |
5789 /* If the address isn't aligned properly for the type, | |
5790 we may need to copy to a temporary. | |
5791 FIXME: This is inefficient. Usually we can do this | |
5792 in registers. */ | |
5793 else if (align == 0 | |
5794 && TYPE_ALIGN (type) > BITS_PER_WORD) | |
5795 { | |
5796 tree tmp = create_tmp_var (type, "va_arg_tmp"); | |
5797 tree dest_addr = build_fold_addr_expr (tmp); | |
5798 | |
5799 tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY], 3, | |
5800 dest_addr, | |
5801 addr, | |
5802 size_int (rsize)); | |
5803 | |
5804 gimplify_and_add (copy, pre_p); | |
5805 addr = dest_addr; | |
5806 } | |
5807 else | |
5808 addr = fold_convert (ptrtype, addr); | |
5809 | |
5810 incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize)); | |
5811 gimplify_assign (valist, incr, post_p); | |
5812 | |
5813 return build_va_arg_indirect_ref (addr); | |
5814 } | |
5815 | |
5816 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. | |
5817 Specify whether the vector mode is supported by the hardware. */ | |
5818 | |
5819 static bool | |
5820 sparc_vector_mode_supported_p (enum machine_mode mode) | |
5821 { | |
5822 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; | |
5823 } | |
5824 | |
5825 /* Return the string to output an unconditional branch to LABEL, which is | |
5826 the operand number of the label. | |
5827 | |
5828 DEST is the destination insn (i.e. the label), INSN is the source. */ | |
5829 | |
5830 const char * | |
5831 output_ubranch (rtx dest, int label, rtx insn) | |
5832 { | |
5833 static char string[64]; | |
5834 bool v9_form = false; | |
5835 char *p; | |
5836 | |
5837 if (TARGET_V9 && INSN_ADDRESSES_SET_P ()) | |
5838 { | |
5839 int delta = (INSN_ADDRESSES (INSN_UID (dest)) | |
5840 - INSN_ADDRESSES (INSN_UID (insn))); | |
5841 /* Leave some instructions for "slop". */ | |
5842 if (delta >= -260000 && delta < 260000) | |
5843 v9_form = true; | |
5844 } | |
5845 | |
5846 if (v9_form) | |
5847 strcpy (string, "ba%*,pt\t%%xcc, "); | |
5848 else | |
5849 strcpy (string, "b%*\t"); | |
5850 | |
5851 p = strchr (string, '\0'); | |
5852 *p++ = '%'; | |
5853 *p++ = 'l'; | |
5854 *p++ = '0' + label; | |
5855 *p++ = '%'; | |
5856 *p++ = '('; | |
5857 *p = '\0'; | |
5858 | |
5859 return string; | |
5860 } | |
5861 | |
5862 /* Return the string to output a conditional branch to LABEL, which is | |
5863 the operand number of the label. OP is the conditional expression. | |
5864 XEXP (OP, 0) is assumed to be a condition code register (integer or | |
5865 floating point) and its mode specifies what kind of comparison we made. | |
5866 | |
5867 DEST is the destination insn (i.e. the label), INSN is the source. | |
5868 | |
5869 REVERSED is nonzero if we should reverse the sense of the comparison. | |
5870 | |
5871 ANNUL is nonzero if we should generate an annulling branch. */ | |
5872 | |
5873 const char * | |
5874 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, | |
5875 rtx insn) | |
5876 { | |
5877 static char string[64]; | |
5878 enum rtx_code code = GET_CODE (op); | |
5879 rtx cc_reg = XEXP (op, 0); | |
5880 enum machine_mode mode = GET_MODE (cc_reg); | |
5881 const char *labelno, *branch; | |
5882 int spaces = 8, far; | |
5883 char *p; | |
5884 | |
5885 /* v9 branches are limited to +-1MB. If it is too far away, | |
5886 change | |
5887 | |
5888 bne,pt %xcc, .LC30 | |
5889 | |
5890 to | |
5891 | |
5892 be,pn %xcc, .+12 | |
5893 nop | |
5894 ba .LC30 | |
5895 | |
5896 and | |
5897 | |
5898 fbne,a,pn %fcc2, .LC29 | |
5899 | |
5900 to | |
5901 | |
5902 fbe,pt %fcc2, .+16 | |
5903 nop | |
5904 ba .LC29 */ | |
5905 | |
5906 far = TARGET_V9 && (get_attr_length (insn) >= 3); | |
5907 if (reversed ^ far) | |
5908 { | |
5909 /* Reversal of FP compares takes care -- an ordered compare | |
5910 becomes an unordered compare and vice versa. */ | |
5911 if (mode == CCFPmode || mode == CCFPEmode) | |
5912 code = reverse_condition_maybe_unordered (code); | |
5913 else | |
5914 code = reverse_condition (code); | |
5915 } | |
5916 | |
5917 /* Start by writing the branch condition. */ | |
5918 if (mode == CCFPmode || mode == CCFPEmode) | |
5919 { | |
5920 switch (code) | |
5921 { | |
5922 case NE: | |
5923 branch = "fbne"; | |
5924 break; | |
5925 case EQ: | |
5926 branch = "fbe"; | |
5927 break; | |
5928 case GE: | |
5929 branch = "fbge"; | |
5930 break; | |
5931 case GT: | |
5932 branch = "fbg"; | |
5933 break; | |
5934 case LE: | |
5935 branch = "fble"; | |
5936 break; | |
5937 case LT: | |
5938 branch = "fbl"; | |
5939 break; | |
5940 case UNORDERED: | |
5941 branch = "fbu"; | |
5942 break; | |
5943 case ORDERED: | |
5944 branch = "fbo"; | |
5945 break; | |
5946 case UNGT: | |
5947 branch = "fbug"; | |
5948 break; | |
5949 case UNLT: | |
5950 branch = "fbul"; | |
5951 break; | |
5952 case UNEQ: | |
5953 branch = "fbue"; | |
5954 break; | |
5955 case UNGE: | |
5956 branch = "fbuge"; | |
5957 break; | |
5958 case UNLE: | |
5959 branch = "fbule"; | |
5960 break; | |
5961 case LTGT: | |
5962 branch = "fblg"; | |
5963 break; | |
5964 | |
5965 default: | |
5966 gcc_unreachable (); | |
5967 } | |
5968 | |
5969 /* ??? !v9: FP branches cannot be preceded by another floating point | |
5970 insn. Because there is currently no concept of pre-delay slots, | |
5971 we can fix this only by always emitting a nop before a floating | |
5972 point branch. */ | |
5973 | |
5974 string[0] = '\0'; | |
5975 if (! TARGET_V9) | |
5976 strcpy (string, "nop\n\t"); | |
5977 strcat (string, branch); | |
5978 } | |
5979 else | |
5980 { | |
5981 switch (code) | |
5982 { | |
5983 case NE: | |
5984 branch = "bne"; | |
5985 break; | |
5986 case EQ: | |
5987 branch = "be"; | |
5988 break; | |
5989 case GE: | |
5990 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) | |
5991 branch = "bpos"; | |
5992 else | |
5993 branch = "bge"; | |
5994 break; | |
5995 case GT: | |
5996 branch = "bg"; | |
5997 break; | |
5998 case LE: | |
5999 branch = "ble"; | |
6000 break; | |
6001 case LT: | |
6002 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) | |
6003 branch = "bneg"; | |
6004 else | |
6005 branch = "bl"; | |
6006 break; | |
6007 case GEU: | |
6008 branch = "bgeu"; | |
6009 break; | |
6010 case GTU: | |
6011 branch = "bgu"; | |
6012 break; | |
6013 case LEU: | |
6014 branch = "bleu"; | |
6015 break; | |
6016 case LTU: | |
6017 branch = "blu"; | |
6018 break; | |
6019 | |
6020 default: | |
6021 gcc_unreachable (); | |
6022 } | |
6023 strcpy (string, branch); | |
6024 } | |
6025 spaces -= strlen (branch); | |
6026 p = strchr (string, '\0'); | |
6027 | |
6028 /* Now add the annulling, the label, and a possible noop. */ | |
6029 if (annul && ! far) | |
6030 { | |
6031 strcpy (p, ",a"); | |
6032 p += 2; | |
6033 spaces -= 2; | |
6034 } | |
6035 | |
6036 if (TARGET_V9) | |
6037 { | |
6038 rtx note; | |
6039 int v8 = 0; | |
6040 | |
6041 if (! far && insn && INSN_ADDRESSES_SET_P ()) | |
6042 { | |
6043 int delta = (INSN_ADDRESSES (INSN_UID (dest)) | |
6044 - INSN_ADDRESSES (INSN_UID (insn))); | |
6045 /* Leave some instructions for "slop". */ | |
6046 if (delta < -260000 || delta >= 260000) | |
6047 v8 = 1; | |
6048 } | |
6049 | |
6050 if (mode == CCFPmode || mode == CCFPEmode) | |
6051 { | |
6052 static char v9_fcc_labelno[] = "%%fccX, "; | |
6053 /* Set the char indicating the number of the fcc reg to use. */ | |
6054 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; | |
6055 labelno = v9_fcc_labelno; | |
6056 if (v8) | |
6057 { | |
6058 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); | |
6059 labelno = ""; | |
6060 } | |
6061 } | |
6062 else if (mode == CCXmode || mode == CCX_NOOVmode) | |
6063 { | |
6064 labelno = "%%xcc, "; | |
6065 gcc_assert (! v8); | |
6066 } | |
6067 else | |
6068 { | |
6069 labelno = "%%icc, "; | |
6070 if (v8) | |
6071 labelno = ""; | |
6072 } | |
6073 | |
6074 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) | |
6075 { | |
6076 strcpy (p, | |
6077 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) | |
6078 ? ",pt" : ",pn"); | |
6079 p += 3; | |
6080 spaces -= 3; | |
6081 } | |
6082 } | |
6083 else | |
6084 labelno = ""; | |
6085 | |
6086 if (spaces > 0) | |
6087 *p++ = '\t'; | |
6088 else | |
6089 *p++ = ' '; | |
6090 strcpy (p, labelno); | |
6091 p = strchr (p, '\0'); | |
6092 if (far) | |
6093 { | |
6094 strcpy (p, ".+12\n\t nop\n\tb\t"); | |
6095 /* Skip the next insn if requested or | |
6096 if we know that it will be a nop. */ | |
6097 if (annul || ! final_sequence) | |
6098 p[3] = '6'; | |
6099 p += 14; | |
6100 } | |
6101 *p++ = '%'; | |
6102 *p++ = 'l'; | |
6103 *p++ = label + '0'; | |
6104 *p++ = '%'; | |
6105 *p++ = '#'; | |
6106 *p = '\0'; | |
6107 | |
6108 return string; | |
6109 } | |
6110 | |
6111 /* Emit a library call comparison between floating point X and Y. | |
6112 COMPARISON is the operator to compare with (EQ, NE, GT, etc). | |
6113 Return the new operator to be used in the comparison sequence. | |
6114 | |
6115 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode | |
6116 values as arguments instead of the TFmode registers themselves, | |
6117 that's why we cannot call emit_float_lib_cmp. */ | |
6118 | |
6119 enum rtx_code | |
6120 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) | |
6121 { | |
6122 const char *qpfunc; | |
6123 rtx slot0, slot1, result, tem, tem2; | |
6124 enum machine_mode mode; | |
6125 enum rtx_code new_comparison; | |
6126 | |
6127 switch (comparison) | |
6128 { | |
6129 case EQ: | |
6130 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); | |
6131 break; | |
6132 | |
6133 case NE: | |
6134 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); | |
6135 break; | |
6136 | |
6137 case GT: | |
6138 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); | |
6139 break; | |
6140 | |
6141 case GE: | |
6142 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); | |
6143 break; | |
6144 | |
6145 case LT: | |
6146 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); | |
6147 break; | |
6148 | |
6149 case LE: | |
6150 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); | |
6151 break; | |
6152 | |
6153 case ORDERED: | |
6154 case UNORDERED: | |
6155 case UNGT: | |
6156 case UNLT: | |
6157 case UNEQ: | |
6158 case UNGE: | |
6159 case UNLE: | |
6160 case LTGT: | |
6161 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); | |
6162 break; | |
6163 | |
6164 default: | |
6165 gcc_unreachable (); | |
6166 } | |
6167 | |
6168 if (TARGET_ARCH64) | |
6169 { | |
6170 if (MEM_P (x)) | |
6171 slot0 = x; | |
6172 else | |
6173 { | |
6174 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0); | |
6175 emit_move_insn (slot0, x); | |
6176 } | |
6177 | |
6178 if (MEM_P (y)) | |
6179 slot1 = y; | |
6180 else | |
6181 { | |
6182 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0); | |
6183 emit_move_insn (slot1, y); | |
6184 } | |
6185 | |
6186 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL, | |
6187 DImode, 2, | |
6188 XEXP (slot0, 0), Pmode, | |
6189 XEXP (slot1, 0), Pmode); | |
6190 mode = DImode; | |
6191 } | |
6192 else | |
6193 { | |
6194 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL, | |
6195 SImode, 2, | |
6196 x, TFmode, y, TFmode); | |
6197 mode = SImode; | |
6198 } | |
6199 | |
6200 | |
6201 /* Immediately move the result of the libcall into a pseudo | |
6202 register so reload doesn't clobber the value if it needs | |
6203 the return register for a spill reg. */ | |
6204 result = gen_reg_rtx (mode); | |
6205 emit_move_insn (result, hard_libcall_value (mode)); | |
6206 | |
6207 switch (comparison) | |
6208 { | |
6209 default: | |
6210 new_comparison = NE; | |
6211 emit_cmp_insn (result, const0_rtx, new_comparison, NULL_RTX, mode, 0); | |
6212 break; | |
6213 case ORDERED: | |
6214 case UNORDERED: | |
6215 new_comparison = (comparison == UNORDERED ? EQ : NE); | |
6216 emit_cmp_insn (result, GEN_INT(3), new_comparison, NULL_RTX, mode, 0); | |
6217 break; | |
6218 case UNGT: | |
6219 case UNGE: | |
6220 new_comparison = (comparison == UNGT ? GT : NE); | |
6221 emit_cmp_insn (result, const1_rtx, new_comparison, NULL_RTX, mode, 0); | |
6222 break; | |
6223 case UNLE: | |
6224 new_comparison = NE; | |
6225 emit_cmp_insn (result, const2_rtx, new_comparison, NULL_RTX, mode, 0); | |
6226 break; | |
6227 case UNLT: | |
6228 tem = gen_reg_rtx (mode); | |
6229 if (TARGET_ARCH32) | |
6230 emit_insn (gen_andsi3 (tem, result, const1_rtx)); | |
6231 else | |
6232 emit_insn (gen_anddi3 (tem, result, const1_rtx)); | |
6233 new_comparison = NE; | |
6234 emit_cmp_insn (tem, const0_rtx, new_comparison, NULL_RTX, mode, 0); | |
6235 break; | |
6236 case UNEQ: | |
6237 case LTGT: | |
6238 tem = gen_reg_rtx (mode); | |
6239 if (TARGET_ARCH32) | |
6240 emit_insn (gen_addsi3 (tem, result, const1_rtx)); | |
6241 else | |
6242 emit_insn (gen_adddi3 (tem, result, const1_rtx)); | |
6243 tem2 = gen_reg_rtx (mode); | |
6244 if (TARGET_ARCH32) | |
6245 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); | |
6246 else | |
6247 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); | |
6248 new_comparison = (comparison == UNEQ ? EQ : NE); | |
6249 emit_cmp_insn (tem2, const0_rtx, new_comparison, NULL_RTX, mode, 0); | |
6250 break; | |
6251 } | |
6252 | |
6253 return new_comparison; | |
6254 } | |
6255 | |
6256 /* Generate an unsigned DImode to FP conversion. This is the same code | |
6257 optabs would emit if we didn't have TFmode patterns. */ | |
6258 | |
6259 void | |
6260 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode) | |
6261 { | |
6262 rtx neglab, donelab, i0, i1, f0, in, out; | |
6263 | |
6264 out = operands[0]; | |
6265 in = force_reg (DImode, operands[1]); | |
6266 neglab = gen_label_rtx (); | |
6267 donelab = gen_label_rtx (); | |
6268 i0 = gen_reg_rtx (DImode); | |
6269 i1 = gen_reg_rtx (DImode); | |
6270 f0 = gen_reg_rtx (mode); | |
6271 | |
6272 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); | |
6273 | |
6274 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); | |
6275 emit_jump_insn (gen_jump (donelab)); | |
6276 emit_barrier (); | |
6277 | |
6278 emit_label (neglab); | |
6279 | |
6280 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); | |
6281 emit_insn (gen_anddi3 (i1, in, const1_rtx)); | |
6282 emit_insn (gen_iordi3 (i0, i0, i1)); | |
6283 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); | |
6284 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); | |
6285 | |
6286 emit_label (donelab); | |
6287 } | |
6288 | |
6289 /* Generate an FP to unsigned DImode conversion. This is the same code | |
6290 optabs would emit if we didn't have TFmode patterns. */ | |
6291 | |
6292 void | |
6293 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode) | |
6294 { | |
6295 rtx neglab, donelab, i0, i1, f0, in, out, limit; | |
6296 | |
6297 out = operands[0]; | |
6298 in = force_reg (mode, operands[1]); | |
6299 neglab = gen_label_rtx (); | |
6300 donelab = gen_label_rtx (); | |
6301 i0 = gen_reg_rtx (DImode); | |
6302 i1 = gen_reg_rtx (DImode); | |
6303 limit = gen_reg_rtx (mode); | |
6304 f0 = gen_reg_rtx (mode); | |
6305 | |
6306 emit_move_insn (limit, | |
6307 CONST_DOUBLE_FROM_REAL_VALUE ( | |
6308 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); | |
6309 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); | |
6310 | |
6311 emit_insn (gen_rtx_SET (VOIDmode, | |
6312 out, | |
6313 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); | |
6314 emit_jump_insn (gen_jump (donelab)); | |
6315 emit_barrier (); | |
6316 | |
6317 emit_label (neglab); | |
6318 | |
6319 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit))); | |
6320 emit_insn (gen_rtx_SET (VOIDmode, | |
6321 i0, | |
6322 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); | |
6323 emit_insn (gen_movdi (i1, const1_rtx)); | |
6324 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); | |
6325 emit_insn (gen_xordi3 (out, i0, i1)); | |
6326 | |
6327 emit_label (donelab); | |
6328 } | |
6329 | |
6330 /* Return the string to output a conditional branch to LABEL, testing | |
6331 register REG. LABEL is the operand number of the label; REG is the | |
6332 operand number of the reg. OP is the conditional expression. The mode | |
6333 of REG says what kind of comparison we made. | |
6334 | |
6335 DEST is the destination insn (i.e. the label), INSN is the source. | |
6336 | |
6337 REVERSED is nonzero if we should reverse the sense of the comparison. | |
6338 | |
6339 ANNUL is nonzero if we should generate an annulling branch. */ | |
6340 | |
6341 const char * | |
6342 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, | |
6343 int annul, rtx insn) | |
6344 { | |
6345 static char string[64]; | |
6346 enum rtx_code code = GET_CODE (op); | |
6347 enum machine_mode mode = GET_MODE (XEXP (op, 0)); | |
6348 rtx note; | |
6349 int far; | |
6350 char *p; | |
6351 | |
6352 /* branch on register are limited to +-128KB. If it is too far away, | |
6353 change | |
6354 | |
6355 brnz,pt %g1, .LC30 | |
6356 | |
6357 to | |
6358 | |
6359 brz,pn %g1, .+12 | |
6360 nop | |
6361 ba,pt %xcc, .LC30 | |
6362 | |
6363 and | |
6364 | |
6365 brgez,a,pn %o1, .LC29 | |
6366 | |
6367 to | |
6368 | |
6369 brlz,pt %o1, .+16 | |
6370 nop | |
6371 ba,pt %xcc, .LC29 */ | |
6372 | |
6373 far = get_attr_length (insn) >= 3; | |
6374 | |
6375 /* If not floating-point or if EQ or NE, we can just reverse the code. */ | |
6376 if (reversed ^ far) | |
6377 code = reverse_condition (code); | |
6378 | |
6379 /* Only 64 bit versions of these instructions exist. */ | |
6380 gcc_assert (mode == DImode); | |
6381 | |
6382 /* Start by writing the branch condition. */ | |
6383 | |
6384 switch (code) | |
6385 { | |
6386 case NE: | |
6387 strcpy (string, "brnz"); | |
6388 break; | |
6389 | |
6390 case EQ: | |
6391 strcpy (string, "brz"); | |
6392 break; | |
6393 | |
6394 case GE: | |
6395 strcpy (string, "brgez"); | |
6396 break; | |
6397 | |
6398 case LT: | |
6399 strcpy (string, "brlz"); | |
6400 break; | |
6401 | |
6402 case LE: | |
6403 strcpy (string, "brlez"); | |
6404 break; | |
6405 | |
6406 case GT: | |
6407 strcpy (string, "brgz"); | |
6408 break; | |
6409 | |
6410 default: | |
6411 gcc_unreachable (); | |
6412 } | |
6413 | |
6414 p = strchr (string, '\0'); | |
6415 | |
6416 /* Now add the annulling, reg, label, and nop. */ | |
6417 if (annul && ! far) | |
6418 { | |
6419 strcpy (p, ",a"); | |
6420 p += 2; | |
6421 } | |
6422 | |
6423 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) | |
6424 { | |
6425 strcpy (p, | |
6426 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) | |
6427 ? ",pt" : ",pn"); | |
6428 p += 3; | |
6429 } | |
6430 | |
6431 *p = p < string + 8 ? '\t' : ' '; | |
6432 p++; | |
6433 *p++ = '%'; | |
6434 *p++ = '0' + reg; | |
6435 *p++ = ','; | |
6436 *p++ = ' '; | |
6437 if (far) | |
6438 { | |
6439 int veryfar = 1, delta; | |
6440 | |
6441 if (INSN_ADDRESSES_SET_P ()) | |
6442 { | |
6443 delta = (INSN_ADDRESSES (INSN_UID (dest)) | |
6444 - INSN_ADDRESSES (INSN_UID (insn))); | |
6445 /* Leave some instructions for "slop". */ | |
6446 if (delta >= -260000 && delta < 260000) | |
6447 veryfar = 0; | |
6448 } | |
6449 | |
6450 strcpy (p, ".+12\n\t nop\n\t"); | |
6451 /* Skip the next insn if requested or | |
6452 if we know that it will be a nop. */ | |
6453 if (annul || ! final_sequence) | |
6454 p[3] = '6'; | |
6455 p += 12; | |
6456 if (veryfar) | |
6457 { | |
6458 strcpy (p, "b\t"); | |
6459 p += 2; | |
6460 } | |
6461 else | |
6462 { | |
6463 strcpy (p, "ba,pt\t%%xcc, "); | |
6464 p += 13; | |
6465 } | |
6466 } | |
6467 *p++ = '%'; | |
6468 *p++ = 'l'; | |
6469 *p++ = '0' + label; | |
6470 *p++ = '%'; | |
6471 *p++ = '#'; | |
6472 *p = '\0'; | |
6473 | |
6474 return string; | |
6475 } | |
6476 | |
6477 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. | |
6478 Such instructions cannot be used in the delay slot of return insn on v9. | |
6479 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. | |
6480 */ | |
6481 | |
6482 static int | |
6483 epilogue_renumber (register rtx *where, int test) | |
6484 { | |
6485 register const char *fmt; | |
6486 register int i; | |
6487 register enum rtx_code code; | |
6488 | |
6489 if (*where == 0) | |
6490 return 0; | |
6491 | |
6492 code = GET_CODE (*where); | |
6493 | |
6494 switch (code) | |
6495 { | |
6496 case REG: | |
6497 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ | |
6498 return 1; | |
6499 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) | |
6500 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); | |
6501 case SCRATCH: | |
6502 case CC0: | |
6503 case PC: | |
6504 case CONST_INT: | |
6505 case CONST_DOUBLE: | |
6506 return 0; | |
6507 | |
6508 /* Do not replace the frame pointer with the stack pointer because | |
6509 it can cause the delayed instruction to load below the stack. | |
6510 This occurs when instructions like: | |
6511 | |
6512 (set (reg/i:SI 24 %i0) | |
6513 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) | |
6514 (const_int -20 [0xffffffec])) 0)) | |
6515 | |
6516 are in the return delayed slot. */ | |
6517 case PLUS: | |
6518 if (GET_CODE (XEXP (*where, 0)) == REG | |
6519 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM | |
6520 && (GET_CODE (XEXP (*where, 1)) != CONST_INT | |
6521 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) | |
6522 return 1; | |
6523 break; | |
6524 | |
6525 case MEM: | |
6526 if (SPARC_STACK_BIAS | |
6527 && GET_CODE (XEXP (*where, 0)) == REG | |
6528 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) | |
6529 return 1; | |
6530 break; | |
6531 | |
6532 default: | |
6533 break; | |
6534 } | |
6535 | |
6536 fmt = GET_RTX_FORMAT (code); | |
6537 | |
6538 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
6539 { | |
6540 if (fmt[i] == 'E') | |
6541 { | |
6542 register int j; | |
6543 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) | |
6544 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) | |
6545 return 1; | |
6546 } | |
6547 else if (fmt[i] == 'e' | |
6548 && epilogue_renumber (&(XEXP (*where, i)), test)) | |
6549 return 1; | |
6550 } | |
6551 return 0; | |
6552 } | |
6553 | |
6554 /* Leaf functions and non-leaf functions have different needs. */ | |
6555 | |
6556 static const int | |
6557 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; | |
6558 | |
6559 static const int | |
6560 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; | |
6561 | |
6562 static const int *const reg_alloc_orders[] = { | |
6563 reg_leaf_alloc_order, | |
6564 reg_nonleaf_alloc_order}; | |
6565 | |
6566 void | |
6567 order_regs_for_local_alloc (void) | |
6568 { | |
6569 static int last_order_nonleaf = 1; | |
6570 | |
6571 if (df_regs_ever_live_p (15) != last_order_nonleaf) | |
6572 { | |
6573 last_order_nonleaf = !last_order_nonleaf; | |
6574 memcpy ((char *) reg_alloc_order, | |
6575 (const char *) reg_alloc_orders[last_order_nonleaf], | |
6576 FIRST_PSEUDO_REGISTER * sizeof (int)); | |
6577 } | |
6578 } | |
6579 | |
6580 /* Return 1 if REG and MEM are legitimate enough to allow the various | |
6581 mem<-->reg splits to be run. */ | |
6582 | |
6583 int | |
6584 sparc_splitdi_legitimate (rtx reg, rtx mem) | |
6585 { | |
6586 /* Punt if we are here by mistake. */ | |
6587 gcc_assert (reload_completed); | |
6588 | |
6589 /* We must have an offsettable memory reference. */ | |
6590 if (! offsettable_memref_p (mem)) | |
6591 return 0; | |
6592 | |
6593 /* If we have legitimate args for ldd/std, we do not want | |
6594 the split to happen. */ | |
6595 if ((REGNO (reg) % 2) == 0 | |
6596 && mem_min_alignment (mem, 8)) | |
6597 return 0; | |
6598 | |
6599 /* Success. */ | |
6600 return 1; | |
6601 } | |
6602 | |
6603 /* Return 1 if x and y are some kind of REG and they refer to | |
6604 different hard registers. This test is guaranteed to be | |
6605 run after reload. */ | |
6606 | |
6607 int | |
6608 sparc_absnegfloat_split_legitimate (rtx x, rtx y) | |
6609 { | |
6610 if (GET_CODE (x) != REG) | |
6611 return 0; | |
6612 if (GET_CODE (y) != REG) | |
6613 return 0; | |
6614 if (REGNO (x) == REGNO (y)) | |
6615 return 0; | |
6616 return 1; | |
6617 } | |
6618 | |
6619 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. | |
6620 This makes them candidates for using ldd and std insns. | |
6621 | |
6622 Note reg1 and reg2 *must* be hard registers. */ | |
6623 | |
6624 int | |
6625 registers_ok_for_ldd_peep (rtx reg1, rtx reg2) | |
6626 { | |
6627 /* We might have been passed a SUBREG. */ | |
6628 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) | |
6629 return 0; | |
6630 | |
6631 if (REGNO (reg1) % 2 != 0) | |
6632 return 0; | |
6633 | |
6634 /* Integer ldd is deprecated in SPARC V9 */ | |
6635 if (TARGET_V9 && REGNO (reg1) < 32) | |
6636 return 0; | |
6637 | |
6638 return (REGNO (reg1) == REGNO (reg2) - 1); | |
6639 } | |
6640 | |
6641 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in | |
6642 an ldd or std insn. | |
6643 | |
6644 This can only happen when addr1 and addr2, the addresses in mem1 | |
6645 and mem2, are consecutive memory locations (addr1 + 4 == addr2). | |
6646 addr1 must also be aligned on a 64-bit boundary. | |
6647 | |
6648 Also iff dependent_reg_rtx is not null it should not be used to | |
6649 compute the address for mem1, i.e. we cannot optimize a sequence | |
6650 like: | |
6651 ld [%o0], %o0 | |
6652 ld [%o0 + 4], %o1 | |
6653 to | |
6654 ldd [%o0], %o0 | |
6655 nor: | |
6656 ld [%g3 + 4], %g3 | |
6657 ld [%g3], %g2 | |
6658 to | |
6659 ldd [%g3], %g2 | |
6660 | |
6661 But, note that the transformation from: | |
6662 ld [%g2 + 4], %g3 | |
6663 ld [%g2], %g2 | |
6664 to | |
6665 ldd [%g2], %g2 | |
6666 is perfectly fine. Thus, the peephole2 patterns always pass us | |
6667 the destination register of the first load, never the second one. | |
6668 | |
6669 For stores we don't have a similar problem, so dependent_reg_rtx is | |
6670 NULL_RTX. */ | |
6671 | |
6672 int | |
6673 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) | |
6674 { | |
6675 rtx addr1, addr2; | |
6676 unsigned int reg1; | |
6677 HOST_WIDE_INT offset1; | |
6678 | |
6679 /* The mems cannot be volatile. */ | |
6680 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) | |
6681 return 0; | |
6682 | |
6683 /* MEM1 should be aligned on a 64-bit boundary. */ | |
6684 if (MEM_ALIGN (mem1) < 64) | |
6685 return 0; | |
6686 | |
6687 addr1 = XEXP (mem1, 0); | |
6688 addr2 = XEXP (mem2, 0); | |
6689 | |
6690 /* Extract a register number and offset (if used) from the first addr. */ | |
6691 if (GET_CODE (addr1) == PLUS) | |
6692 { | |
6693 /* If not a REG, return zero. */ | |
6694 if (GET_CODE (XEXP (addr1, 0)) != REG) | |
6695 return 0; | |
6696 else | |
6697 { | |
6698 reg1 = REGNO (XEXP (addr1, 0)); | |
6699 /* The offset must be constant! */ | |
6700 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) | |
6701 return 0; | |
6702 offset1 = INTVAL (XEXP (addr1, 1)); | |
6703 } | |
6704 } | |
6705 else if (GET_CODE (addr1) != REG) | |
6706 return 0; | |
6707 else | |
6708 { | |
6709 reg1 = REGNO (addr1); | |
6710 /* This was a simple (mem (reg)) expression. Offset is 0. */ | |
6711 offset1 = 0; | |
6712 } | |
6713 | |
6714 /* Make sure the second address is a (mem (plus (reg) (const_int). */ | |
6715 if (GET_CODE (addr2) != PLUS) | |
6716 return 0; | |
6717 | |
6718 if (GET_CODE (XEXP (addr2, 0)) != REG | |
6719 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) | |
6720 return 0; | |
6721 | |
6722 if (reg1 != REGNO (XEXP (addr2, 0))) | |
6723 return 0; | |
6724 | |
6725 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) | |
6726 return 0; | |
6727 | |
6728 /* The first offset must be evenly divisible by 8 to ensure the | |
6729 address is 64 bit aligned. */ | |
6730 if (offset1 % 8 != 0) | |
6731 return 0; | |
6732 | |
6733 /* The offset for the second addr must be 4 more than the first addr. */ | |
6734 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) | |
6735 return 0; | |
6736 | |
6737 /* All the tests passed. addr1 and addr2 are valid for ldd and std | |
6738 instructions. */ | |
6739 return 1; | |
6740 } | |
6741 | |
6742 /* Return 1 if reg is a pseudo, or is the first register in | |
6743 a hard register pair. This makes it suitable for use in | |
6744 ldd and std insns. */ | |
6745 | |
6746 int | |
6747 register_ok_for_ldd (rtx reg) | |
6748 { | |
6749 /* We might have been passed a SUBREG. */ | |
6750 if (!REG_P (reg)) | |
6751 return 0; | |
6752 | |
6753 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) | |
6754 return (REGNO (reg) % 2 == 0); | |
6755 | |
6756 return 1; | |
6757 } | |
6758 | |
6759 /* Return 1 if OP is a memory whose address is known to be | |
6760 aligned to 8-byte boundary, or a pseudo during reload. | |
6761 This makes it suitable for use in ldd and std insns. */ | |
6762 | |
6763 int | |
6764 memory_ok_for_ldd (rtx op) | |
6765 { | |
6766 if (MEM_P (op)) | |
6767 { | |
6768 /* In 64-bit mode, we assume that the address is word-aligned. */ | |
6769 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) | |
6770 return 0; | |
6771 | |
6772 if ((reload_in_progress || reload_completed) | |
6773 && !strict_memory_address_p (Pmode, XEXP (op, 0))) | |
6774 return 0; | |
6775 } | |
6776 else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) | |
6777 { | |
6778 if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0)) | |
6779 return 0; | |
6780 } | |
6781 else | |
6782 return 0; | |
6783 | |
6784 return 1; | |
6785 } | |
6786 | |
6787 /* Print operand X (an rtx) in assembler syntax to file FILE. | |
6788 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. | |
6789 For `%' followed by punctuation, CODE is the punctuation and X is null. */ | |
6790 | |
6791 void | |
6792 print_operand (FILE *file, rtx x, int code) | |
6793 { | |
6794 switch (code) | |
6795 { | |
6796 case '#': | |
6797 /* Output an insn in a delay slot. */ | |
6798 if (final_sequence) | |
6799 sparc_indent_opcode = 1; | |
6800 else | |
6801 fputs ("\n\t nop", file); | |
6802 return; | |
6803 case '*': | |
6804 /* Output an annul flag if there's nothing for the delay slot and we | |
6805 are optimizing. This is always used with '(' below. | |
6806 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; | |
6807 this is a dbx bug. So, we only do this when optimizing. | |
6808 On UltraSPARC, a branch in a delay slot causes a pipeline flush. | |
6809 Always emit a nop in case the next instruction is a branch. */ | |
6810 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) | |
6811 fputs (",a", file); | |
6812 return; | |
6813 case '(': | |
6814 /* Output a 'nop' if there's nothing for the delay slot and we are | |
6815 not optimizing. This is always used with '*' above. */ | |
6816 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) | |
6817 fputs ("\n\t nop", file); | |
6818 else if (final_sequence) | |
6819 sparc_indent_opcode = 1; | |
6820 return; | |
6821 case ')': | |
6822 /* Output the right displacement from the saved PC on function return. | |
6823 The caller may have placed an "unimp" insn immediately after the call | |
6824 so we have to account for it. This insn is used in the 32-bit ABI | |
6825 when calling a function that returns a non zero-sized structure. The | |
6826 64-bit ABI doesn't have it. Be careful to have this test be the same | |
6827 as that used on the call. The exception here is that when | |
6828 sparc_std_struct_return is enabled, the psABI is followed exactly | |
6829 and the adjustment is made by the code in sparc_struct_value_rtx. | |
6830 The call emitted is the same when sparc_std_struct_return is | |
6831 present. */ | |
6832 if (! TARGET_ARCH64 | |
6833 && cfun->returns_struct | |
6834 && ! sparc_std_struct_return | |
6835 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) | |
6836 == INTEGER_CST) | |
6837 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) | |
6838 fputs ("12", file); | |
6839 else | |
6840 fputc ('8', file); | |
6841 return; | |
6842 case '_': | |
6843 /* Output the Embedded Medium/Anywhere code model base register. */ | |
6844 fputs (EMBMEDANY_BASE_REG, file); | |
6845 return; | |
6846 case '&': | |
6847 /* Print some local dynamic TLS name. */ | |
6848 assemble_name (file, get_some_local_dynamic_name ()); | |
6849 return; | |
6850 | |
6851 case 'Y': | |
6852 /* Adjust the operand to take into account a RESTORE operation. */ | |
6853 if (GET_CODE (x) == CONST_INT) | |
6854 break; | |
6855 else if (GET_CODE (x) != REG) | |
6856 output_operand_lossage ("invalid %%Y operand"); | |
6857 else if (REGNO (x) < 8) | |
6858 fputs (reg_names[REGNO (x)], file); | |
6859 else if (REGNO (x) >= 24 && REGNO (x) < 32) | |
6860 fputs (reg_names[REGNO (x)-16], file); | |
6861 else | |
6862 output_operand_lossage ("invalid %%Y operand"); | |
6863 return; | |
6864 case 'L': | |
6865 /* Print out the low order register name of a register pair. */ | |
6866 if (WORDS_BIG_ENDIAN) | |
6867 fputs (reg_names[REGNO (x)+1], file); | |
6868 else | |
6869 fputs (reg_names[REGNO (x)], file); | |
6870 return; | |
6871 case 'H': | |
6872 /* Print out the high order register name of a register pair. */ | |
6873 if (WORDS_BIG_ENDIAN) | |
6874 fputs (reg_names[REGNO (x)], file); | |
6875 else | |
6876 fputs (reg_names[REGNO (x)+1], file); | |
6877 return; | |
6878 case 'R': | |
6879 /* Print out the second register name of a register pair or quad. | |
6880 I.e., R (%o0) => %o1. */ | |
6881 fputs (reg_names[REGNO (x)+1], file); | |
6882 return; | |
6883 case 'S': | |
6884 /* Print out the third register name of a register quad. | |
6885 I.e., S (%o0) => %o2. */ | |
6886 fputs (reg_names[REGNO (x)+2], file); | |
6887 return; | |
6888 case 'T': | |
6889 /* Print out the fourth register name of a register quad. | |
6890 I.e., T (%o0) => %o3. */ | |
6891 fputs (reg_names[REGNO (x)+3], file); | |
6892 return; | |
6893 case 'x': | |
6894 /* Print a condition code register. */ | |
6895 if (REGNO (x) == SPARC_ICC_REG) | |
6896 { | |
6897 /* We don't handle CC[X]_NOOVmode because they're not supposed | |
6898 to occur here. */ | |
6899 if (GET_MODE (x) == CCmode) | |
6900 fputs ("%icc", file); | |
6901 else if (GET_MODE (x) == CCXmode) | |
6902 fputs ("%xcc", file); | |
6903 else | |
6904 gcc_unreachable (); | |
6905 } | |
6906 else | |
6907 /* %fccN register */ | |
6908 fputs (reg_names[REGNO (x)], file); | |
6909 return; | |
6910 case 'm': | |
6911 /* Print the operand's address only. */ | |
6912 output_address (XEXP (x, 0)); | |
6913 return; | |
6914 case 'r': | |
6915 /* In this case we need a register. Use %g0 if the | |
6916 operand is const0_rtx. */ | |
6917 if (x == const0_rtx | |
6918 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) | |
6919 { | |
6920 fputs ("%g0", file); | |
6921 return; | |
6922 } | |
6923 else | |
6924 break; | |
6925 | |
6926 case 'A': | |
6927 switch (GET_CODE (x)) | |
6928 { | |
6929 case IOR: fputs ("or", file); break; | |
6930 case AND: fputs ("and", file); break; | |
6931 case XOR: fputs ("xor", file); break; | |
6932 default: output_operand_lossage ("invalid %%A operand"); | |
6933 } | |
6934 return; | |
6935 | |
6936 case 'B': | |
6937 switch (GET_CODE (x)) | |
6938 { | |
6939 case IOR: fputs ("orn", file); break; | |
6940 case AND: fputs ("andn", file); break; | |
6941 case XOR: fputs ("xnor", file); break; | |
6942 default: output_operand_lossage ("invalid %%B operand"); | |
6943 } | |
6944 return; | |
6945 | |
6946 /* These are used by the conditional move instructions. */ | |
6947 case 'c' : | |
6948 case 'C': | |
6949 { | |
6950 enum rtx_code rc = GET_CODE (x); | |
6951 | |
6952 if (code == 'c') | |
6953 { | |
6954 enum machine_mode mode = GET_MODE (XEXP (x, 0)); | |
6955 if (mode == CCFPmode || mode == CCFPEmode) | |
6956 rc = reverse_condition_maybe_unordered (GET_CODE (x)); | |
6957 else | |
6958 rc = reverse_condition (GET_CODE (x)); | |
6959 } | |
6960 switch (rc) | |
6961 { | |
6962 case NE: fputs ("ne", file); break; | |
6963 case EQ: fputs ("e", file); break; | |
6964 case GE: fputs ("ge", file); break; | |
6965 case GT: fputs ("g", file); break; | |
6966 case LE: fputs ("le", file); break; | |
6967 case LT: fputs ("l", file); break; | |
6968 case GEU: fputs ("geu", file); break; | |
6969 case GTU: fputs ("gu", file); break; | |
6970 case LEU: fputs ("leu", file); break; | |
6971 case LTU: fputs ("lu", file); break; | |
6972 case LTGT: fputs ("lg", file); break; | |
6973 case UNORDERED: fputs ("u", file); break; | |
6974 case ORDERED: fputs ("o", file); break; | |
6975 case UNLT: fputs ("ul", file); break; | |
6976 case UNLE: fputs ("ule", file); break; | |
6977 case UNGT: fputs ("ug", file); break; | |
6978 case UNGE: fputs ("uge", file); break; | |
6979 case UNEQ: fputs ("ue", file); break; | |
6980 default: output_operand_lossage (code == 'c' | |
6981 ? "invalid %%c operand" | |
6982 : "invalid %%C operand"); | |
6983 } | |
6984 return; | |
6985 } | |
6986 | |
6987 /* These are used by the movr instruction pattern. */ | |
6988 case 'd': | |
6989 case 'D': | |
6990 { | |
6991 enum rtx_code rc = (code == 'd' | |
6992 ? reverse_condition (GET_CODE (x)) | |
6993 : GET_CODE (x)); | |
6994 switch (rc) | |
6995 { | |
6996 case NE: fputs ("ne", file); break; | |
6997 case EQ: fputs ("e", file); break; | |
6998 case GE: fputs ("gez", file); break; | |
6999 case LT: fputs ("lz", file); break; | |
7000 case LE: fputs ("lez", file); break; | |
7001 case GT: fputs ("gz", file); break; | |
7002 default: output_operand_lossage (code == 'd' | |
7003 ? "invalid %%d operand" | |
7004 : "invalid %%D operand"); | |
7005 } | |
7006 return; | |
7007 } | |
7008 | |
7009 case 'b': | |
7010 { | |
7011 /* Print a sign-extended character. */ | |
7012 int i = trunc_int_for_mode (INTVAL (x), QImode); | |
7013 fprintf (file, "%d", i); | |
7014 return; | |
7015 } | |
7016 | |
7017 case 'f': | |
7018 /* Operand must be a MEM; write its address. */ | |
7019 if (GET_CODE (x) != MEM) | |
7020 output_operand_lossage ("invalid %%f operand"); | |
7021 output_address (XEXP (x, 0)); | |
7022 return; | |
7023 | |
7024 case 's': | |
7025 { | |
7026 /* Print a sign-extended 32-bit value. */ | |
7027 HOST_WIDE_INT i; | |
7028 if (GET_CODE(x) == CONST_INT) | |
7029 i = INTVAL (x); | |
7030 else if (GET_CODE(x) == CONST_DOUBLE) | |
7031 i = CONST_DOUBLE_LOW (x); | |
7032 else | |
7033 { | |
7034 output_operand_lossage ("invalid %%s operand"); | |
7035 return; | |
7036 } | |
7037 i = trunc_int_for_mode (i, SImode); | |
7038 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); | |
7039 return; | |
7040 } | |
7041 | |
7042 case 0: | |
7043 /* Do nothing special. */ | |
7044 break; | |
7045 | |
7046 default: | |
7047 /* Undocumented flag. */ | |
7048 output_operand_lossage ("invalid operand output code"); | |
7049 } | |
7050 | |
7051 if (GET_CODE (x) == REG) | |
7052 fputs (reg_names[REGNO (x)], file); | |
7053 else if (GET_CODE (x) == MEM) | |
7054 { | |
7055 fputc ('[', file); | |
7056 /* Poor Sun assembler doesn't understand absolute addressing. */ | |
7057 if (CONSTANT_P (XEXP (x, 0))) | |
7058 fputs ("%g0+", file); | |
7059 output_address (XEXP (x, 0)); | |
7060 fputc (']', file); | |
7061 } | |
7062 else if (GET_CODE (x) == HIGH) | |
7063 { | |
7064 fputs ("%hi(", file); | |
7065 output_addr_const (file, XEXP (x, 0)); | |
7066 fputc (')', file); | |
7067 } | |
7068 else if (GET_CODE (x) == LO_SUM) | |
7069 { | |
7070 print_operand (file, XEXP (x, 0), 0); | |
7071 if (TARGET_CM_MEDMID) | |
7072 fputs ("+%l44(", file); | |
7073 else | |
7074 fputs ("+%lo(", file); | |
7075 output_addr_const (file, XEXP (x, 1)); | |
7076 fputc (')', file); | |
7077 } | |
7078 else if (GET_CODE (x) == CONST_DOUBLE | |
7079 && (GET_MODE (x) == VOIDmode | |
7080 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)) | |
7081 { | |
7082 if (CONST_DOUBLE_HIGH (x) == 0) | |
7083 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x)); | |
7084 else if (CONST_DOUBLE_HIGH (x) == -1 | |
7085 && CONST_DOUBLE_LOW (x) < 0) | |
7086 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x)); | |
7087 else | |
7088 output_operand_lossage ("long long constant not a valid immediate operand"); | |
7089 } | |
7090 else if (GET_CODE (x) == CONST_DOUBLE) | |
7091 output_operand_lossage ("floating point constant not a valid immediate operand"); | |
7092 else { output_addr_const (file, x); } | |
7093 } | |
7094 | |
7095 /* Target hook for assembling integer objects. The sparc version has | |
7096 special handling for aligned DI-mode objects. */ | |
7097 | |
7098 static bool | |
7099 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) | |
7100 { | |
7101 /* ??? We only output .xword's for symbols and only then in environments | |
7102 where the assembler can handle them. */ | |
7103 if (aligned_p && size == 8 | |
7104 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE)) | |
7105 { | |
7106 if (TARGET_V9) | |
7107 { | |
7108 assemble_integer_with_op ("\t.xword\t", x); | |
7109 return true; | |
7110 } | |
7111 else | |
7112 { | |
7113 assemble_aligned_integer (4, const0_rtx); | |
7114 assemble_aligned_integer (4, x); | |
7115 return true; | |
7116 } | |
7117 } | |
7118 return default_assemble_integer (x, size, aligned_p); | |
7119 } | |
7120 | |
7121 /* Return the value of a code used in the .proc pseudo-op that says | |
7122 what kind of result this function returns. For non-C types, we pick | |
7123 the closest C type. */ | |
7124 | |
7125 #ifndef SHORT_TYPE_SIZE | |
7126 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) | |
7127 #endif | |
7128 | |
7129 #ifndef INT_TYPE_SIZE | |
7130 #define INT_TYPE_SIZE BITS_PER_WORD | |
7131 #endif | |
7132 | |
7133 #ifndef LONG_TYPE_SIZE | |
7134 #define LONG_TYPE_SIZE BITS_PER_WORD | |
7135 #endif | |
7136 | |
7137 #ifndef LONG_LONG_TYPE_SIZE | |
7138 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) | |
7139 #endif | |
7140 | |
7141 #ifndef FLOAT_TYPE_SIZE | |
7142 #define FLOAT_TYPE_SIZE BITS_PER_WORD | |
7143 #endif | |
7144 | |
7145 #ifndef DOUBLE_TYPE_SIZE | |
7146 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) | |
7147 #endif | |
7148 | |
7149 #ifndef LONG_DOUBLE_TYPE_SIZE | |
7150 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) | |
7151 #endif | |
7152 | |
7153 unsigned long | |
7154 sparc_type_code (register tree type) | |
7155 { | |
7156 register unsigned long qualifiers = 0; | |
7157 register unsigned shift; | |
7158 | |
7159 /* Only the first 30 bits of the qualifier are valid. We must refrain from | |
7160 setting more, since some assemblers will give an error for this. Also, | |
7161 we must be careful to avoid shifts of 32 bits or more to avoid getting | |
7162 unpredictable results. */ | |
7163 | |
7164 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) | |
7165 { | |
7166 switch (TREE_CODE (type)) | |
7167 { | |
7168 case ERROR_MARK: | |
7169 return qualifiers; | |
7170 | |
7171 case ARRAY_TYPE: | |
7172 qualifiers |= (3 << shift); | |
7173 break; | |
7174 | |
7175 case FUNCTION_TYPE: | |
7176 case METHOD_TYPE: | |
7177 qualifiers |= (2 << shift); | |
7178 break; | |
7179 | |
7180 case POINTER_TYPE: | |
7181 case REFERENCE_TYPE: | |
7182 case OFFSET_TYPE: | |
7183 qualifiers |= (1 << shift); | |
7184 break; | |
7185 | |
7186 case RECORD_TYPE: | |
7187 return (qualifiers | 8); | |
7188 | |
7189 case UNION_TYPE: | |
7190 case QUAL_UNION_TYPE: | |
7191 return (qualifiers | 9); | |
7192 | |
7193 case ENUMERAL_TYPE: | |
7194 return (qualifiers | 10); | |
7195 | |
7196 case VOID_TYPE: | |
7197 return (qualifiers | 16); | |
7198 | |
7199 case INTEGER_TYPE: | |
7200 /* If this is a range type, consider it to be the underlying | |
7201 type. */ | |
7202 if (TREE_TYPE (type) != 0) | |
7203 break; | |
7204 | |
7205 /* Carefully distinguish all the standard types of C, | |
7206 without messing up if the language is not C. We do this by | |
7207 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to | |
7208 look at both the names and the above fields, but that's redundant. | |
7209 Any type whose size is between two C types will be considered | |
7210 to be the wider of the two types. Also, we do not have a | |
7211 special code to use for "long long", so anything wider than | |
7212 long is treated the same. Note that we can't distinguish | |
7213 between "int" and "long" in this code if they are the same | |
7214 size, but that's fine, since neither can the assembler. */ | |
7215 | |
7216 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) | |
7217 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); | |
7218 | |
7219 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) | |
7220 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); | |
7221 | |
7222 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) | |
7223 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); | |
7224 | |
7225 else | |
7226 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); | |
7227 | |
7228 case REAL_TYPE: | |
7229 /* If this is a range type, consider it to be the underlying | |
7230 type. */ | |
7231 if (TREE_TYPE (type) != 0) | |
7232 break; | |
7233 | |
7234 /* Carefully distinguish all the standard types of C, | |
7235 without messing up if the language is not C. */ | |
7236 | |
7237 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) | |
7238 return (qualifiers | 6); | |
7239 | |
7240 else | |
7241 return (qualifiers | 7); | |
7242 | |
7243 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ | |
7244 /* ??? We need to distinguish between double and float complex types, | |
7245 but I don't know how yet because I can't reach this code from | |
7246 existing front-ends. */ | |
7247 return (qualifiers | 7); /* Who knows? */ | |
7248 | |
7249 case VECTOR_TYPE: | |
7250 case BOOLEAN_TYPE: /* Boolean truth value type. */ | |
7251 case LANG_TYPE: /* ? */ | |
7252 return qualifiers; | |
7253 | |
7254 default: | |
7255 gcc_unreachable (); /* Not a type! */ | |
7256 } | |
7257 } | |
7258 | |
7259 return qualifiers; | |
7260 } | |
7261 | |
7262 /* Nested function support. */ | |
7263 | |
7264 /* Emit RTL insns to initialize the variable parts of a trampoline. | |
7265 FNADDR is an RTX for the address of the function's pure code. | |
7266 CXT is an RTX for the static chain value for the function. | |
7267 | |
7268 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi | |
7269 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes | |
7270 (to store insns). This is a bit excessive. Perhaps a different | |
7271 mechanism would be better here. | |
7272 | |
7273 Emit enough FLUSH insns to synchronize the data and instruction caches. */ | |
7274 | |
7275 void | |
7276 sparc_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) | |
7277 { | |
7278 /* SPARC 32-bit trampoline: | |
7279 | |
7280 sethi %hi(fn), %g1 | |
7281 sethi %hi(static), %g2 | |
7282 jmp %g1+%lo(fn) | |
7283 or %g2, %lo(static), %g2 | |
7284 | |
7285 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii | |
7286 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii | |
7287 */ | |
7288 | |
7289 emit_move_insn | |
7290 (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), | |
7291 expand_binop (SImode, ior_optab, | |
7292 expand_shift (RSHIFT_EXPR, SImode, fnaddr, | |
7293 size_int (10), 0, 1), | |
7294 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), | |
7295 NULL_RTX, 1, OPTAB_DIRECT)); | |
7296 | |
7297 emit_move_insn | |
7298 (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), | |
7299 expand_binop (SImode, ior_optab, | |
7300 expand_shift (RSHIFT_EXPR, SImode, cxt, | |
7301 size_int (10), 0, 1), | |
7302 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), | |
7303 NULL_RTX, 1, OPTAB_DIRECT)); | |
7304 | |
7305 emit_move_insn | |
7306 (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), | |
7307 expand_binop (SImode, ior_optab, | |
7308 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), | |
7309 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), | |
7310 NULL_RTX, 1, OPTAB_DIRECT)); | |
7311 | |
7312 emit_move_insn | |
7313 (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), | |
7314 expand_binop (SImode, ior_optab, | |
7315 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), | |
7316 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), | |
7317 NULL_RTX, 1, OPTAB_DIRECT)); | |
7318 | |
7319 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is | |
7320 aligned on a 16 byte boundary so one flush clears it all. */ | |
7321 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp)))); | |
7322 if (sparc_cpu != PROCESSOR_ULTRASPARC | |
7323 && sparc_cpu != PROCESSOR_ULTRASPARC3 | |
7324 && sparc_cpu != PROCESSOR_NIAGARA | |
7325 && sparc_cpu != PROCESSOR_NIAGARA2) | |
7326 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, | |
7327 plus_constant (tramp, 8))))); | |
7328 | |
7329 /* Call __enable_execute_stack after writing onto the stack to make sure | |
7330 the stack address is accessible. */ | |
7331 #ifdef ENABLE_EXECUTE_STACK | |
7332 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), | |
7333 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); | |
7334 #endif | |
7335 | |
7336 } | |
7337 | |
7338 /* The 64-bit version is simpler because it makes more sense to load the | |
7339 values as "immediate" data out of the trampoline. It's also easier since | |
7340 we can read the PC without clobbering a register. */ | |
7341 | |
7342 void | |
7343 sparc64_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) | |
7344 { | |
7345 /* SPARC 64-bit trampoline: | |
7346 | |
7347 rd %pc, %g1 | |
7348 ldx [%g1+24], %g5 | |
7349 jmp %g5 | |
7350 ldx [%g1+16], %g5 | |
7351 +16 bytes data | |
7352 */ | |
7353 | |
7354 emit_move_insn (gen_rtx_MEM (SImode, tramp), | |
7355 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); | |
7356 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), | |
7357 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); | |
7358 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), | |
7359 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); | |
7360 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), | |
7361 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); | |
7362 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt); | |
7363 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr); | |
7364 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp)))); | |
7365 | |
7366 if (sparc_cpu != PROCESSOR_ULTRASPARC | |
7367 && sparc_cpu != PROCESSOR_ULTRASPARC3 | |
7368 && sparc_cpu != PROCESSOR_NIAGARA | |
7369 && sparc_cpu != PROCESSOR_NIAGARA2) | |
7370 emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8))))); | |
7371 | |
7372 /* Call __enable_execute_stack after writing onto the stack to make sure | |
7373 the stack address is accessible. */ | |
7374 #ifdef ENABLE_EXECUTE_STACK | |
7375 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), | |
7376 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); | |
7377 #endif | |
7378 } | |
7379 | |
7380 /* Adjust the cost of a scheduling dependency. Return the new cost of | |
7381 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ | |
7382 | |
7383 static int | |
7384 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) | |
7385 { | |
7386 enum attr_type insn_type; | |
7387 | |
7388 if (! recog_memoized (insn)) | |
7389 return 0; | |
7390 | |
7391 insn_type = get_attr_type (insn); | |
7392 | |
7393 if (REG_NOTE_KIND (link) == 0) | |
7394 { | |
7395 /* Data dependency; DEP_INSN writes a register that INSN reads some | |
7396 cycles later. */ | |
7397 | |
7398 /* if a load, then the dependence must be on the memory address; | |
7399 add an extra "cycle". Note that the cost could be two cycles | |
7400 if the reg was written late in an instruction group; we ca not tell | |
7401 here. */ | |
7402 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) | |
7403 return cost + 3; | |
7404 | |
7405 /* Get the delay only if the address of the store is the dependence. */ | |
7406 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) | |
7407 { | |
7408 rtx pat = PATTERN(insn); | |
7409 rtx dep_pat = PATTERN (dep_insn); | |
7410 | |
7411 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) | |
7412 return cost; /* This should not happen! */ | |
7413 | |
7414 /* The dependency between the two instructions was on the data that | |
7415 is being stored. Assume that this implies that the address of the | |
7416 store is not dependent. */ | |
7417 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) | |
7418 return cost; | |
7419 | |
7420 return cost + 3; /* An approximation. */ | |
7421 } | |
7422 | |
7423 /* A shift instruction cannot receive its data from an instruction | |
7424 in the same cycle; add a one cycle penalty. */ | |
7425 if (insn_type == TYPE_SHIFT) | |
7426 return cost + 3; /* Split before cascade into shift. */ | |
7427 } | |
7428 else | |
7429 { | |
7430 /* Anti- or output- dependency; DEP_INSN reads/writes a register that | |
7431 INSN writes some cycles later. */ | |
7432 | |
7433 /* These are only significant for the fpu unit; writing a fp reg before | |
7434 the fpu has finished with it stalls the processor. */ | |
7435 | |
7436 /* Reusing an integer register causes no problems. */ | |
7437 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) | |
7438 return 0; | |
7439 } | |
7440 | |
7441 return cost; | |
7442 } | |
7443 | |
7444 static int | |
7445 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) | |
7446 { | |
7447 enum attr_type insn_type, dep_type; | |
7448 rtx pat = PATTERN(insn); | |
7449 rtx dep_pat = PATTERN (dep_insn); | |
7450 | |
7451 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) | |
7452 return cost; | |
7453 | |
7454 insn_type = get_attr_type (insn); | |
7455 dep_type = get_attr_type (dep_insn); | |
7456 | |
7457 switch (REG_NOTE_KIND (link)) | |
7458 { | |
7459 case 0: | |
7460 /* Data dependency; DEP_INSN writes a register that INSN reads some | |
7461 cycles later. */ | |
7462 | |
7463 switch (insn_type) | |
7464 { | |
7465 case TYPE_STORE: | |
7466 case TYPE_FPSTORE: | |
7467 /* Get the delay iff the address of the store is the dependence. */ | |
7468 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) | |
7469 return cost; | |
7470 | |
7471 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) | |
7472 return cost; | |
7473 return cost + 3; | |
7474 | |
7475 case TYPE_LOAD: | |
7476 case TYPE_SLOAD: | |
7477 case TYPE_FPLOAD: | |
7478 /* If a load, then the dependence must be on the memory address. If | |
7479 the addresses aren't equal, then it might be a false dependency */ | |
7480 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) | |
7481 { | |
7482 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET | |
7483 || GET_CODE (SET_DEST (dep_pat)) != MEM | |
7484 || GET_CODE (SET_SRC (pat)) != MEM | |
7485 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), | |
7486 XEXP (SET_SRC (pat), 0))) | |
7487 return cost + 2; | |
7488 | |
7489 return cost + 8; | |
7490 } | |
7491 break; | |
7492 | |
7493 case TYPE_BRANCH: | |
7494 /* Compare to branch latency is 0. There is no benefit from | |
7495 separating compare and branch. */ | |
7496 if (dep_type == TYPE_COMPARE) | |
7497 return 0; | |
7498 /* Floating point compare to branch latency is less than | |
7499 compare to conditional move. */ | |
7500 if (dep_type == TYPE_FPCMP) | |
7501 return cost - 1; | |
7502 break; | |
7503 default: | |
7504 break; | |
7505 } | |
7506 break; | |
7507 | |
7508 case REG_DEP_ANTI: | |
7509 /* Anti-dependencies only penalize the fpu unit. */ | |
7510 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) | |
7511 return 0; | |
7512 break; | |
7513 | |
7514 default: | |
7515 break; | |
7516 } | |
7517 | |
7518 return cost; | |
7519 } | |
7520 | |
7521 static int | |
7522 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost) | |
7523 { | |
7524 switch (sparc_cpu) | |
7525 { | |
7526 case PROCESSOR_SUPERSPARC: | |
7527 cost = supersparc_adjust_cost (insn, link, dep, cost); | |
7528 break; | |
7529 case PROCESSOR_HYPERSPARC: | |
7530 case PROCESSOR_SPARCLITE86X: | |
7531 cost = hypersparc_adjust_cost (insn, link, dep, cost); | |
7532 break; | |
7533 default: | |
7534 break; | |
7535 } | |
7536 return cost; | |
7537 } | |
7538 | |
7539 static void | |
7540 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, | |
7541 int sched_verbose ATTRIBUTE_UNUSED, | |
7542 int max_ready ATTRIBUTE_UNUSED) | |
7543 { | |
7544 } | |
7545 | |
7546 static int | |
7547 sparc_use_sched_lookahead (void) | |
7548 { | |
7549 if (sparc_cpu == PROCESSOR_NIAGARA | |
7550 || sparc_cpu == PROCESSOR_NIAGARA2) | |
7551 return 0; | |
7552 if (sparc_cpu == PROCESSOR_ULTRASPARC | |
7553 || sparc_cpu == PROCESSOR_ULTRASPARC3) | |
7554 return 4; | |
7555 if ((1 << sparc_cpu) & | |
7556 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | | |
7557 (1 << PROCESSOR_SPARCLITE86X))) | |
7558 return 3; | |
7559 return 0; | |
7560 } | |
7561 | |
7562 static int | |
7563 sparc_issue_rate (void) | |
7564 { | |
7565 switch (sparc_cpu) | |
7566 { | |
7567 case PROCESSOR_NIAGARA: | |
7568 case PROCESSOR_NIAGARA2: | |
7569 default: | |
7570 return 1; | |
7571 case PROCESSOR_V9: | |
7572 /* Assume V9 processors are capable of at least dual-issue. */ | |
7573 return 2; | |
7574 case PROCESSOR_SUPERSPARC: | |
7575 return 3; | |
7576 case PROCESSOR_HYPERSPARC: | |
7577 case PROCESSOR_SPARCLITE86X: | |
7578 return 2; | |
7579 case PROCESSOR_ULTRASPARC: | |
7580 case PROCESSOR_ULTRASPARC3: | |
7581 return 4; | |
7582 } | |
7583 } | |
7584 | |
7585 static int | |
7586 set_extends (rtx insn) | |
7587 { | |
7588 register rtx pat = PATTERN (insn); | |
7589 | |
7590 switch (GET_CODE (SET_SRC (pat))) | |
7591 { | |
7592 /* Load and some shift instructions zero extend. */ | |
7593 case MEM: | |
7594 case ZERO_EXTEND: | |
7595 /* sethi clears the high bits */ | |
7596 case HIGH: | |
7597 /* LO_SUM is used with sethi. sethi cleared the high | |
7598 bits and the values used with lo_sum are positive */ | |
7599 case LO_SUM: | |
7600 /* Store flag stores 0 or 1 */ | |
7601 case LT: case LTU: | |
7602 case GT: case GTU: | |
7603 case LE: case LEU: | |
7604 case GE: case GEU: | |
7605 case EQ: | |
7606 case NE: | |
7607 return 1; | |
7608 case AND: | |
7609 { | |
7610 rtx op0 = XEXP (SET_SRC (pat), 0); | |
7611 rtx op1 = XEXP (SET_SRC (pat), 1); | |
7612 if (GET_CODE (op1) == CONST_INT) | |
7613 return INTVAL (op1) >= 0; | |
7614 if (GET_CODE (op0) != REG) | |
7615 return 0; | |
7616 if (sparc_check_64 (op0, insn) == 1) | |
7617 return 1; | |
7618 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); | |
7619 } | |
7620 case IOR: | |
7621 case XOR: | |
7622 { | |
7623 rtx op0 = XEXP (SET_SRC (pat), 0); | |
7624 rtx op1 = XEXP (SET_SRC (pat), 1); | |
7625 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) | |
7626 return 0; | |
7627 if (GET_CODE (op1) == CONST_INT) | |
7628 return INTVAL (op1) >= 0; | |
7629 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); | |
7630 } | |
7631 case LSHIFTRT: | |
7632 return GET_MODE (SET_SRC (pat)) == SImode; | |
7633 /* Positive integers leave the high bits zero. */ | |
7634 case CONST_DOUBLE: | |
7635 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000); | |
7636 case CONST_INT: | |
7637 return ! (INTVAL (SET_SRC (pat)) & 0x80000000); | |
7638 case ASHIFTRT: | |
7639 case SIGN_EXTEND: | |
7640 return - (GET_MODE (SET_SRC (pat)) == SImode); | |
7641 case REG: | |
7642 return sparc_check_64 (SET_SRC (pat), insn); | |
7643 default: | |
7644 return 0; | |
7645 } | |
7646 } | |
7647 | |
7648 /* We _ought_ to have only one kind per function, but... */ | |
7649 static GTY(()) rtx sparc_addr_diff_list; | |
7650 static GTY(()) rtx sparc_addr_list; | |
7651 | |
7652 void | |
7653 sparc_defer_case_vector (rtx lab, rtx vec, int diff) | |
7654 { | |
7655 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); | |
7656 if (diff) | |
7657 sparc_addr_diff_list | |
7658 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); | |
7659 else | |
7660 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); | |
7661 } | |
7662 | |
7663 static void | |
7664 sparc_output_addr_vec (rtx vec) | |
7665 { | |
7666 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); | |
7667 int idx, vlen = XVECLEN (body, 0); | |
7668 | |
7669 #ifdef ASM_OUTPUT_ADDR_VEC_START | |
7670 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); | |
7671 #endif | |
7672 | |
7673 #ifdef ASM_OUTPUT_CASE_LABEL | |
7674 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), | |
7675 NEXT_INSN (lab)); | |
7676 #else | |
7677 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); | |
7678 #endif | |
7679 | |
7680 for (idx = 0; idx < vlen; idx++) | |
7681 { | |
7682 ASM_OUTPUT_ADDR_VEC_ELT | |
7683 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); | |
7684 } | |
7685 | |
7686 #ifdef ASM_OUTPUT_ADDR_VEC_END | |
7687 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); | |
7688 #endif | |
7689 } | |
7690 | |
7691 static void | |
7692 sparc_output_addr_diff_vec (rtx vec) | |
7693 { | |
7694 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); | |
7695 rtx base = XEXP (XEXP (body, 0), 0); | |
7696 int idx, vlen = XVECLEN (body, 1); | |
7697 | |
7698 #ifdef ASM_OUTPUT_ADDR_VEC_START | |
7699 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); | |
7700 #endif | |
7701 | |
7702 #ifdef ASM_OUTPUT_CASE_LABEL | |
7703 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), | |
7704 NEXT_INSN (lab)); | |
7705 #else | |
7706 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); | |
7707 #endif | |
7708 | |
7709 for (idx = 0; idx < vlen; idx++) | |
7710 { | |
7711 ASM_OUTPUT_ADDR_DIFF_ELT | |
7712 (asm_out_file, | |
7713 body, | |
7714 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), | |
7715 CODE_LABEL_NUMBER (base)); | |
7716 } | |
7717 | |
7718 #ifdef ASM_OUTPUT_ADDR_VEC_END | |
7719 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); | |
7720 #endif | |
7721 } | |
7722 | |
7723 static void | |
7724 sparc_output_deferred_case_vectors (void) | |
7725 { | |
7726 rtx t; | |
7727 int align; | |
7728 | |
7729 if (sparc_addr_list == NULL_RTX | |
7730 && sparc_addr_diff_list == NULL_RTX) | |
7731 return; | |
7732 | |
7733 /* Align to cache line in the function's code section. */ | |
7734 switch_to_section (current_function_section ()); | |
7735 | |
7736 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); | |
7737 if (align > 0) | |
7738 ASM_OUTPUT_ALIGN (asm_out_file, align); | |
7739 | |
7740 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) | |
7741 sparc_output_addr_vec (XEXP (t, 0)); | |
7742 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) | |
7743 sparc_output_addr_diff_vec (XEXP (t, 0)); | |
7744 | |
7745 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; | |
7746 } | |
7747 | |
7748 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are | |
7749 unknown. Return 1 if the high bits are zero, -1 if the register is | |
7750 sign extended. */ | |
7751 int | |
7752 sparc_check_64 (rtx x, rtx insn) | |
7753 { | |
7754 /* If a register is set only once it is safe to ignore insns this | |
7755 code does not know how to handle. The loop will either recognize | |
7756 the single set and return the correct value or fail to recognize | |
7757 it and return 0. */ | |
7758 int set_once = 0; | |
7759 rtx y = x; | |
7760 | |
7761 gcc_assert (GET_CODE (x) == REG); | |
7762 | |
7763 if (GET_MODE (x) == DImode) | |
7764 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); | |
7765 | |
7766 if (flag_expensive_optimizations | |
7767 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) | |
7768 set_once = 1; | |
7769 | |
7770 if (insn == 0) | |
7771 { | |
7772 if (set_once) | |
7773 insn = get_last_insn_anywhere (); | |
7774 else | |
7775 return 0; | |
7776 } | |
7777 | |
7778 while ((insn = PREV_INSN (insn))) | |
7779 { | |
7780 switch (GET_CODE (insn)) | |
7781 { | |
7782 case JUMP_INSN: | |
7783 case NOTE: | |
7784 break; | |
7785 case CODE_LABEL: | |
7786 case CALL_INSN: | |
7787 default: | |
7788 if (! set_once) | |
7789 return 0; | |
7790 break; | |
7791 case INSN: | |
7792 { | |
7793 rtx pat = PATTERN (insn); | |
7794 if (GET_CODE (pat) != SET) | |
7795 return 0; | |
7796 if (rtx_equal_p (x, SET_DEST (pat))) | |
7797 return set_extends (insn); | |
7798 if (y && rtx_equal_p (y, SET_DEST (pat))) | |
7799 return set_extends (insn); | |
7800 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) | |
7801 return 0; | |
7802 } | |
7803 } | |
7804 } | |
7805 return 0; | |
7806 } | |
7807 | |
7808 /* Returns assembly code to perform a DImode shift using | |
7809 a 64-bit global or out register on SPARC-V8+. */ | |
7810 const char * | |
7811 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode) | |
7812 { | |
7813 static char asm_code[60]; | |
7814 | |
7815 /* The scratch register is only required when the destination | |
7816 register is not a 64-bit global or out register. */ | |
7817 if (which_alternative != 2) | |
7818 operands[3] = operands[0]; | |
7819 | |
7820 /* We can only shift by constants <= 63. */ | |
7821 if (GET_CODE (operands[2]) == CONST_INT) | |
7822 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); | |
7823 | |
7824 if (GET_CODE (operands[1]) == CONST_INT) | |
7825 { | |
7826 output_asm_insn ("mov\t%1, %3", operands); | |
7827 } | |
7828 else | |
7829 { | |
7830 output_asm_insn ("sllx\t%H1, 32, %3", operands); | |
7831 if (sparc_check_64 (operands[1], insn) <= 0) | |
7832 output_asm_insn ("srl\t%L1, 0, %L1", operands); | |
7833 output_asm_insn ("or\t%L1, %3, %3", operands); | |
7834 } | |
7835 | |
7836 strcpy(asm_code, opcode); | |
7837 | |
7838 if (which_alternative != 2) | |
7839 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); | |
7840 else | |
7841 return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); | |
7842 } | |
7843 | |
7844 /* Output rtl to increment the profiler label LABELNO | |
7845 for profiling a function entry. */ | |
7846 | |
7847 void | |
7848 sparc_profile_hook (int labelno) | |
7849 { | |
7850 char buf[32]; | |
7851 rtx lab, fun; | |
7852 | |
7853 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); | |
7854 if (NO_PROFILE_COUNTERS) | |
7855 { | |
7856 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0); | |
7857 } | |
7858 else | |
7859 { | |
7860 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); | |
7861 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
7862 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode); | |
7863 } | |
7864 } | |
7865 | |
7866 #ifdef OBJECT_FORMAT_ELF | |
7867 static void | |
7868 sparc_elf_asm_named_section (const char *name, unsigned int flags, | |
7869 tree decl) | |
7870 { | |
7871 if (flags & SECTION_MERGE) | |
7872 { | |
7873 /* entsize cannot be expressed in this section attributes | |
7874 encoding style. */ | |
7875 default_elf_asm_named_section (name, flags, decl); | |
7876 return; | |
7877 } | |
7878 | |
7879 fprintf (asm_out_file, "\t.section\t\"%s\"", name); | |
7880 | |
7881 if (!(flags & SECTION_DEBUG)) | |
7882 fputs (",#alloc", asm_out_file); | |
7883 if (flags & SECTION_WRITE) | |
7884 fputs (",#write", asm_out_file); | |
7885 if (flags & SECTION_TLS) | |
7886 fputs (",#tls", asm_out_file); | |
7887 if (flags & SECTION_CODE) | |
7888 fputs (",#execinstr", asm_out_file); | |
7889 | |
7890 /* ??? Handle SECTION_BSS. */ | |
7891 | |
7892 fputc ('\n', asm_out_file); | |
7893 } | |
7894 #endif /* OBJECT_FORMAT_ELF */ | |
7895 | |
7896 /* We do not allow indirect calls to be optimized into sibling calls. | |
7897 | |
7898 We cannot use sibling calls when delayed branches are disabled | |
7899 because they will likely require the call delay slot to be filled. | |
7900 | |
7901 Also, on SPARC 32-bit we cannot emit a sibling call when the | |
7902 current function returns a structure. This is because the "unimp | |
7903 after call" convention would cause the callee to return to the | |
7904 wrong place. The generic code already disallows cases where the | |
7905 function being called returns a structure. | |
7906 | |
7907 It may seem strange how this last case could occur. Usually there | |
7908 is code after the call which jumps to epilogue code which dumps the | |
7909 return value into the struct return area. That ought to invalidate | |
7910 the sibling call right? Well, in the C++ case we can end up passing | |
7911 the pointer to the struct return area to a constructor (which returns | |
7912 void) and then nothing else happens. Such a sibling call would look | |
7913 valid without the added check here. | |
7914 | |
7915 VxWorks PIC PLT entries require the global pointer to be initialized | |
7916 on entry. We therefore can't emit sibling calls to them. */ | |
7917 static bool | |
7918 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) | |
7919 { | |
7920 return (decl | |
7921 && flag_delayed_branch | |
7922 && (TARGET_ARCH64 || ! cfun->returns_struct) | |
7923 && !(TARGET_VXWORKS_RTP | |
7924 && flag_pic | |
7925 && !targetm.binds_local_p (decl))); | |
7926 } | |
7927 | |
7928 /* libfunc renaming. */ | |
7929 #include "config/gofast.h" | |
7930 | |
7931 static void | |
7932 sparc_init_libfuncs (void) | |
7933 { | |
7934 if (TARGET_ARCH32) | |
7935 { | |
7936 /* Use the subroutines that Sun's library provides for integer | |
7937 multiply and divide. The `*' prevents an underscore from | |
7938 being prepended by the compiler. .umul is a little faster | |
7939 than .mul. */ | |
7940 set_optab_libfunc (smul_optab, SImode, "*.umul"); | |
7941 set_optab_libfunc (sdiv_optab, SImode, "*.div"); | |
7942 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); | |
7943 set_optab_libfunc (smod_optab, SImode, "*.rem"); | |
7944 set_optab_libfunc (umod_optab, SImode, "*.urem"); | |
7945 | |
7946 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ | |
7947 set_optab_libfunc (add_optab, TFmode, "_Q_add"); | |
7948 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); | |
7949 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); | |
7950 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); | |
7951 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); | |
7952 | |
7953 /* We can define the TFmode sqrt optab only if TARGET_FPU. This | |
7954 is because with soft-float, the SFmode and DFmode sqrt | |
7955 instructions will be absent, and the compiler will notice and | |
7956 try to use the TFmode sqrt instruction for calls to the | |
7957 builtin function sqrt, but this fails. */ | |
7958 if (TARGET_FPU) | |
7959 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); | |
7960 | |
7961 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); | |
7962 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); | |
7963 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); | |
7964 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); | |
7965 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); | |
7966 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); | |
7967 | |
7968 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); | |
7969 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); | |
7970 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); | |
7971 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); | |
7972 | |
7973 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); | |
7974 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); | |
7975 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); | |
7976 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); | |
7977 | |
7978 if (DITF_CONVERSION_LIBFUNCS) | |
7979 { | |
7980 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); | |
7981 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); | |
7982 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); | |
7983 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); | |
7984 } | |
7985 | |
7986 if (SUN_CONVERSION_LIBFUNCS) | |
7987 { | |
7988 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); | |
7989 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); | |
7990 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); | |
7991 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); | |
7992 } | |
7993 } | |
7994 if (TARGET_ARCH64) | |
7995 { | |
7996 /* In the SPARC 64bit ABI, SImode multiply and divide functions | |
7997 do not exist in the library. Make sure the compiler does not | |
7998 emit calls to them by accident. (It should always use the | |
7999 hardware instructions.) */ | |
8000 set_optab_libfunc (smul_optab, SImode, 0); | |
8001 set_optab_libfunc (sdiv_optab, SImode, 0); | |
8002 set_optab_libfunc (udiv_optab, SImode, 0); | |
8003 set_optab_libfunc (smod_optab, SImode, 0); | |
8004 set_optab_libfunc (umod_optab, SImode, 0); | |
8005 | |
8006 if (SUN_INTEGER_MULTIPLY_64) | |
8007 { | |
8008 set_optab_libfunc (smul_optab, DImode, "__mul64"); | |
8009 set_optab_libfunc (sdiv_optab, DImode, "__div64"); | |
8010 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); | |
8011 set_optab_libfunc (smod_optab, DImode, "__rem64"); | |
8012 set_optab_libfunc (umod_optab, DImode, "__urem64"); | |
8013 } | |
8014 | |
8015 if (SUN_CONVERSION_LIBFUNCS) | |
8016 { | |
8017 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); | |
8018 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); | |
8019 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); | |
8020 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); | |
8021 } | |
8022 } | |
8023 | |
8024 gofast_maybe_init_libfuncs (); | |
8025 } | |
8026 | |
8027 #define def_builtin(NAME, CODE, TYPE) \ | |
8028 add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \ | |
8029 NULL_TREE) | |
8030 | |
8031 /* Implement the TARGET_INIT_BUILTINS target hook. | |
8032 Create builtin functions for special SPARC instructions. */ | |
8033 | |
8034 static void | |
8035 sparc_init_builtins (void) | |
8036 { | |
8037 if (TARGET_VIS) | |
8038 sparc_vis_init_builtins (); | |
8039 } | |
8040 | |
8041 /* Create builtin functions for VIS 1.0 instructions. */ | |
8042 | |
8043 static void | |
8044 sparc_vis_init_builtins (void) | |
8045 { | |
8046 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); | |
8047 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); | |
8048 tree v4hi = build_vector_type (intHI_type_node, 4); | |
8049 tree v2hi = build_vector_type (intHI_type_node, 2); | |
8050 tree v2si = build_vector_type (intSI_type_node, 2); | |
8051 | |
8052 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); | |
8053 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); | |
8054 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); | |
8055 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); | |
8056 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); | |
8057 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); | |
8058 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); | |
8059 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); | |
8060 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); | |
8061 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); | |
8062 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); | |
8063 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); | |
8064 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, | |
8065 v8qi, v8qi, | |
8066 intDI_type_node, 0); | |
8067 tree di_ftype_di_di = build_function_type_list (intDI_type_node, | |
8068 intDI_type_node, | |
8069 intDI_type_node, 0); | |
8070 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, | |
8071 ptr_type_node, | |
8072 intSI_type_node, 0); | |
8073 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, | |
8074 ptr_type_node, | |
8075 intDI_type_node, 0); | |
8076 | |
8077 /* Packing and expanding vectors. */ | |
8078 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi); | |
8079 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, | |
8080 v8qi_ftype_v2si_v8qi); | |
8081 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, | |
8082 v2hi_ftype_v2si); | |
8083 def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi); | |
8084 def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, | |
8085 v8qi_ftype_v4qi_v4qi); | |
8086 | |
8087 /* Multiplications. */ | |
8088 def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, | |
8089 v4hi_ftype_v4qi_v4hi); | |
8090 def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, | |
8091 v4hi_ftype_v4qi_v2hi); | |
8092 def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, | |
8093 v4hi_ftype_v4qi_v2hi); | |
8094 def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, | |
8095 v4hi_ftype_v8qi_v4hi); | |
8096 def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, | |
8097 v4hi_ftype_v8qi_v4hi); | |
8098 def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, | |
8099 v2si_ftype_v4qi_v2hi); | |
8100 def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, | |
8101 v2si_ftype_v4qi_v2hi); | |
8102 | |
8103 /* Data aligning. */ | |
8104 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, | |
8105 v4hi_ftype_v4hi_v4hi); | |
8106 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, | |
8107 v8qi_ftype_v8qi_v8qi); | |
8108 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, | |
8109 v2si_ftype_v2si_v2si); | |
8110 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis, | |
8111 di_ftype_di_di); | |
8112 if (TARGET_ARCH64) | |
8113 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, | |
8114 ptr_ftype_ptr_di); | |
8115 else | |
8116 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, | |
8117 ptr_ftype_ptr_si); | |
8118 | |
8119 /* Pixel distance. */ | |
8120 def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis, | |
8121 di_ftype_v8qi_v8qi_di); | |
8122 } | |
8123 | |
8124 /* Handle TARGET_EXPAND_BUILTIN target hook. | |
8125 Expand builtin functions for sparc intrinsics. */ | |
8126 | |
8127 static rtx | |
8128 sparc_expand_builtin (tree exp, rtx target, | |
8129 rtx subtarget ATTRIBUTE_UNUSED, | |
8130 enum machine_mode tmode ATTRIBUTE_UNUSED, | |
8131 int ignore ATTRIBUTE_UNUSED) | |
8132 { | |
8133 tree arg; | |
8134 call_expr_arg_iterator iter; | |
8135 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
8136 unsigned int icode = DECL_FUNCTION_CODE (fndecl); | |
8137 rtx pat, op[4]; | |
8138 enum machine_mode mode[4]; | |
8139 int arg_count = 0; | |
8140 | |
8141 mode[0] = insn_data[icode].operand[0].mode; | |
8142 if (!target | |
8143 || GET_MODE (target) != mode[0] | |
8144 || ! (*insn_data[icode].operand[0].predicate) (target, mode[0])) | |
8145 op[0] = gen_reg_rtx (mode[0]); | |
8146 else | |
8147 op[0] = target; | |
8148 | |
8149 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) | |
8150 { | |
8151 arg_count++; | |
8152 mode[arg_count] = insn_data[icode].operand[arg_count].mode; | |
8153 op[arg_count] = expand_normal (arg); | |
8154 | |
8155 if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count], | |
8156 mode[arg_count])) | |
8157 op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]); | |
8158 } | |
8159 | |
8160 switch (arg_count) | |
8161 { | |
8162 case 1: | |
8163 pat = GEN_FCN (icode) (op[0], op[1]); | |
8164 break; | |
8165 case 2: | |
8166 pat = GEN_FCN (icode) (op[0], op[1], op[2]); | |
8167 break; | |
8168 case 3: | |
8169 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); | |
8170 break; | |
8171 default: | |
8172 gcc_unreachable (); | |
8173 } | |
8174 | |
8175 if (!pat) | |
8176 return NULL_RTX; | |
8177 | |
8178 emit_insn (pat); | |
8179 | |
8180 return op[0]; | |
8181 } | |
8182 | |
8183 static int | |
8184 sparc_vis_mul8x16 (int e8, int e16) | |
8185 { | |
8186 return (e8 * e16 + 128) / 256; | |
8187 } | |
8188 | |
8189 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified | |
8190 by FNCODE. All of the elements in ELTS0 and ELTS1 lists must be integer | |
8191 constants. A tree list with the results of the multiplications is returned, | |
8192 and each element in the list is of INNER_TYPE. */ | |
8193 | |
8194 static tree | |
8195 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1) | |
8196 { | |
8197 tree n_elts = NULL_TREE; | |
8198 int scale; | |
8199 | |
8200 switch (fncode) | |
8201 { | |
8202 case CODE_FOR_fmul8x16_vis: | |
8203 for (; elts0 && elts1; | |
8204 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) | |
8205 { | |
8206 int val | |
8207 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), | |
8208 TREE_INT_CST_LOW (TREE_VALUE (elts1))); | |
8209 n_elts = tree_cons (NULL_TREE, | |
8210 build_int_cst (inner_type, val), | |
8211 n_elts); | |
8212 } | |
8213 break; | |
8214 | |
8215 case CODE_FOR_fmul8x16au_vis: | |
8216 scale = TREE_INT_CST_LOW (TREE_VALUE (elts1)); | |
8217 | |
8218 for (; elts0; elts0 = TREE_CHAIN (elts0)) | |
8219 { | |
8220 int val | |
8221 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), | |
8222 scale); | |
8223 n_elts = tree_cons (NULL_TREE, | |
8224 build_int_cst (inner_type, val), | |
8225 n_elts); | |
8226 } | |
8227 break; | |
8228 | |
8229 case CODE_FOR_fmul8x16al_vis: | |
8230 scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1))); | |
8231 | |
8232 for (; elts0; elts0 = TREE_CHAIN (elts0)) | |
8233 { | |
8234 int val | |
8235 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)), | |
8236 scale); | |
8237 n_elts = tree_cons (NULL_TREE, | |
8238 build_int_cst (inner_type, val), | |
8239 n_elts); | |
8240 } | |
8241 break; | |
8242 | |
8243 default: | |
8244 gcc_unreachable (); | |
8245 } | |
8246 | |
8247 return nreverse (n_elts); | |
8248 | |
8249 } | |
8250 /* Handle TARGET_FOLD_BUILTIN target hook. | |
8251 Fold builtin functions for SPARC intrinsics. If IGNORE is true the | |
8252 result of the function call is ignored. NULL_TREE is returned if the | |
8253 function could not be folded. */ | |
8254 | |
8255 static tree | |
8256 sparc_fold_builtin (tree fndecl, tree arglist, bool ignore) | |
8257 { | |
8258 tree arg0, arg1, arg2; | |
8259 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); | |
8260 | |
8261 if (ignore | |
8262 && DECL_FUNCTION_CODE (fndecl) != CODE_FOR_alignaddrsi_vis | |
8263 && DECL_FUNCTION_CODE (fndecl) != CODE_FOR_alignaddrdi_vis) | |
8264 return fold_convert (rtype, integer_zero_node); | |
8265 | |
8266 switch (DECL_FUNCTION_CODE (fndecl)) | |
8267 { | |
8268 case CODE_FOR_fexpand_vis: | |
8269 arg0 = TREE_VALUE (arglist); | |
8270 STRIP_NOPS (arg0); | |
8271 | |
8272 if (TREE_CODE (arg0) == VECTOR_CST) | |
8273 { | |
8274 tree inner_type = TREE_TYPE (rtype); | |
8275 tree elts = TREE_VECTOR_CST_ELTS (arg0); | |
8276 tree n_elts = NULL_TREE; | |
8277 | |
8278 for (; elts; elts = TREE_CHAIN (elts)) | |
8279 { | |
8280 unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4; | |
8281 n_elts = tree_cons (NULL_TREE, | |
8282 build_int_cst (inner_type, val), | |
8283 n_elts); | |
8284 } | |
8285 return build_vector (rtype, nreverse (n_elts)); | |
8286 } | |
8287 break; | |
8288 | |
8289 case CODE_FOR_fmul8x16_vis: | |
8290 case CODE_FOR_fmul8x16au_vis: | |
8291 case CODE_FOR_fmul8x16al_vis: | |
8292 arg0 = TREE_VALUE (arglist); | |
8293 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); | |
8294 STRIP_NOPS (arg0); | |
8295 STRIP_NOPS (arg1); | |
8296 | |
8297 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) | |
8298 { | |
8299 tree inner_type = TREE_TYPE (rtype); | |
8300 tree elts0 = TREE_VECTOR_CST_ELTS (arg0); | |
8301 tree elts1 = TREE_VECTOR_CST_ELTS (arg1); | |
8302 tree n_elts = sparc_handle_vis_mul8x16 (DECL_FUNCTION_CODE (fndecl), | |
8303 inner_type, elts0, elts1); | |
8304 | |
8305 return build_vector (rtype, n_elts); | |
8306 } | |
8307 break; | |
8308 | |
8309 case CODE_FOR_fpmerge_vis: | |
8310 arg0 = TREE_VALUE (arglist); | |
8311 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); | |
8312 STRIP_NOPS (arg0); | |
8313 STRIP_NOPS (arg1); | |
8314 | |
8315 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) | |
8316 { | |
8317 tree elts0 = TREE_VECTOR_CST_ELTS (arg0); | |
8318 tree elts1 = TREE_VECTOR_CST_ELTS (arg1); | |
8319 tree n_elts = NULL_TREE; | |
8320 | |
8321 for (; elts0 && elts1; | |
8322 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) | |
8323 { | |
8324 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts); | |
8325 n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts); | |
8326 } | |
8327 | |
8328 return build_vector (rtype, nreverse (n_elts)); | |
8329 } | |
8330 break; | |
8331 | |
8332 case CODE_FOR_pdist_vis: | |
8333 arg0 = TREE_VALUE (arglist); | |
8334 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); | |
8335 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); | |
8336 STRIP_NOPS (arg0); | |
8337 STRIP_NOPS (arg1); | |
8338 STRIP_NOPS (arg2); | |
8339 | |
8340 if (TREE_CODE (arg0) == VECTOR_CST | |
8341 && TREE_CODE (arg1) == VECTOR_CST | |
8342 && TREE_CODE (arg2) == INTEGER_CST) | |
8343 { | |
8344 int overflow = 0; | |
8345 unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2); | |
8346 HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2); | |
8347 tree elts0 = TREE_VECTOR_CST_ELTS (arg0); | |
8348 tree elts1 = TREE_VECTOR_CST_ELTS (arg1); | |
8349 | |
8350 for (; elts0 && elts1; | |
8351 elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1)) | |
8352 { | |
8353 unsigned HOST_WIDE_INT | |
8354 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)), | |
8355 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1)); | |
8356 HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0)); | |
8357 HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1)); | |
8358 | |
8359 unsigned HOST_WIDE_INT l; | |
8360 HOST_WIDE_INT h; | |
8361 | |
8362 overflow |= neg_double (low1, high1, &l, &h); | |
8363 overflow |= add_double (low0, high0, l, h, &l, &h); | |
8364 if (h < 0) | |
8365 overflow |= neg_double (l, h, &l, &h); | |
8366 | |
8367 overflow |= add_double (low, high, l, h, &low, &high); | |
8368 } | |
8369 | |
8370 gcc_assert (overflow == 0); | |
8371 | |
8372 return build_int_cst_wide (rtype, low, high); | |
8373 } | |
8374 | |
8375 default: | |
8376 break; | |
8377 } | |
8378 | |
8379 return NULL_TREE; | |
8380 } | |
8381 | |
8382 /* ??? This duplicates information provided to the compiler by the | |
8383 ??? scheduler description. Some day, teach genautomata to output | |
8384 ??? the latencies and then CSE will just use that. */ | |
8385 | |
8386 static bool | |
8387 sparc_rtx_costs (rtx x, int code, int outer_code, int *total, | |
8388 bool speed ATTRIBUTE_UNUSED) | |
8389 { | |
8390 enum machine_mode mode = GET_MODE (x); | |
8391 bool float_mode_p = FLOAT_MODE_P (mode); | |
8392 | |
8393 switch (code) | |
8394 { | |
8395 case CONST_INT: | |
8396 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) | |
8397 { | |
8398 *total = 0; | |
8399 return true; | |
8400 } | |
8401 /* FALLTHRU */ | |
8402 | |
8403 case HIGH: | |
8404 *total = 2; | |
8405 return true; | |
8406 | |
8407 case CONST: | |
8408 case LABEL_REF: | |
8409 case SYMBOL_REF: | |
8410 *total = 4; | |
8411 return true; | |
8412 | |
8413 case CONST_DOUBLE: | |
8414 if (GET_MODE (x) == VOIDmode | |
8415 && ((CONST_DOUBLE_HIGH (x) == 0 | |
8416 && CONST_DOUBLE_LOW (x) < 0x1000) | |
8417 || (CONST_DOUBLE_HIGH (x) == -1 | |
8418 && CONST_DOUBLE_LOW (x) < 0 | |
8419 && CONST_DOUBLE_LOW (x) >= -0x1000))) | |
8420 *total = 0; | |
8421 else | |
8422 *total = 8; | |
8423 return true; | |
8424 | |
8425 case MEM: | |
8426 /* If outer-code was a sign or zero extension, a cost | |
8427 of COSTS_N_INSNS (1) was already added in. This is | |
8428 why we are subtracting it back out. */ | |
8429 if (outer_code == ZERO_EXTEND) | |
8430 { | |
8431 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); | |
8432 } | |
8433 else if (outer_code == SIGN_EXTEND) | |
8434 { | |
8435 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); | |
8436 } | |
8437 else if (float_mode_p) | |
8438 { | |
8439 *total = sparc_costs->float_load; | |
8440 } | |
8441 else | |
8442 { | |
8443 *total = sparc_costs->int_load; | |
8444 } | |
8445 | |
8446 return true; | |
8447 | |
8448 case PLUS: | |
8449 case MINUS: | |
8450 if (float_mode_p) | |
8451 *total = sparc_costs->float_plusminus; | |
8452 else | |
8453 *total = COSTS_N_INSNS (1); | |
8454 return false; | |
8455 | |
8456 case MULT: | |
8457 if (float_mode_p) | |
8458 *total = sparc_costs->float_mul; | |
8459 else if (! TARGET_HARD_MUL) | |
8460 *total = COSTS_N_INSNS (25); | |
8461 else | |
8462 { | |
8463 int bit_cost; | |
8464 | |
8465 bit_cost = 0; | |
8466 if (sparc_costs->int_mul_bit_factor) | |
8467 { | |
8468 int nbits; | |
8469 | |
8470 if (GET_CODE (XEXP (x, 1)) == CONST_INT) | |
8471 { | |
8472 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); | |
8473 for (nbits = 0; value != 0; value &= value - 1) | |
8474 nbits++; | |
8475 } | |
8476 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE | |
8477 && GET_MODE (XEXP (x, 1)) == VOIDmode) | |
8478 { | |
8479 rtx x1 = XEXP (x, 1); | |
8480 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1); | |
8481 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1); | |
8482 | |
8483 for (nbits = 0; value1 != 0; value1 &= value1 - 1) | |
8484 nbits++; | |
8485 for (; value2 != 0; value2 &= value2 - 1) | |
8486 nbits++; | |
8487 } | |
8488 else | |
8489 nbits = 7; | |
8490 | |
8491 if (nbits < 3) | |
8492 nbits = 3; | |
8493 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; | |
8494 bit_cost = COSTS_N_INSNS (bit_cost); | |
8495 } | |
8496 | |
8497 if (mode == DImode) | |
8498 *total = sparc_costs->int_mulX + bit_cost; | |
8499 else | |
8500 *total = sparc_costs->int_mul + bit_cost; | |
8501 } | |
8502 return false; | |
8503 | |
8504 case ASHIFT: | |
8505 case ASHIFTRT: | |
8506 case LSHIFTRT: | |
8507 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; | |
8508 return false; | |
8509 | |
8510 case DIV: | |
8511 case UDIV: | |
8512 case MOD: | |
8513 case UMOD: | |
8514 if (float_mode_p) | |
8515 { | |
8516 if (mode == DFmode) | |
8517 *total = sparc_costs->float_div_df; | |
8518 else | |
8519 *total = sparc_costs->float_div_sf; | |
8520 } | |
8521 else | |
8522 { | |
8523 if (mode == DImode) | |
8524 *total = sparc_costs->int_divX; | |
8525 else | |
8526 *total = sparc_costs->int_div; | |
8527 } | |
8528 return false; | |
8529 | |
8530 case NEG: | |
8531 if (! float_mode_p) | |
8532 { | |
8533 *total = COSTS_N_INSNS (1); | |
8534 return false; | |
8535 } | |
8536 /* FALLTHRU */ | |
8537 | |
8538 case ABS: | |
8539 case FLOAT: | |
8540 case UNSIGNED_FLOAT: | |
8541 case FIX: | |
8542 case UNSIGNED_FIX: | |
8543 case FLOAT_EXTEND: | |
8544 case FLOAT_TRUNCATE: | |
8545 *total = sparc_costs->float_move; | |
8546 return false; | |
8547 | |
8548 case SQRT: | |
8549 if (mode == DFmode) | |
8550 *total = sparc_costs->float_sqrt_df; | |
8551 else | |
8552 *total = sparc_costs->float_sqrt_sf; | |
8553 return false; | |
8554 | |
8555 case COMPARE: | |
8556 if (float_mode_p) | |
8557 *total = sparc_costs->float_cmp; | |
8558 else | |
8559 *total = COSTS_N_INSNS (1); | |
8560 return false; | |
8561 | |
8562 case IF_THEN_ELSE: | |
8563 if (float_mode_p) | |
8564 *total = sparc_costs->float_cmove; | |
8565 else | |
8566 *total = sparc_costs->int_cmove; | |
8567 return false; | |
8568 | |
8569 case IOR: | |
8570 /* Handle the NAND vector patterns. */ | |
8571 if (sparc_vector_mode_supported_p (GET_MODE (x)) | |
8572 && GET_CODE (XEXP (x, 0)) == NOT | |
8573 && GET_CODE (XEXP (x, 1)) == NOT) | |
8574 { | |
8575 *total = COSTS_N_INSNS (1); | |
8576 return true; | |
8577 } | |
8578 else | |
8579 return false; | |
8580 | |
8581 default: | |
8582 return false; | |
8583 } | |
8584 } | |
8585 | |
8586 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2. | |
8587 This is achieved by means of a manual dynamic stack space allocation in | |
8588 the current frame. We make the assumption that SEQ doesn't contain any | |
8589 function calls, with the possible exception of calls to the PIC helper. */ | |
8590 | |
8591 static void | |
8592 emit_and_preserve (rtx seq, rtx reg, rtx reg2) | |
8593 { | |
8594 /* We must preserve the lowest 16 words for the register save area. */ | |
8595 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; | |
8596 /* We really need only 2 words of fresh stack space. */ | |
8597 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); | |
8598 | |
8599 rtx slot | |
8600 = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx, | |
8601 SPARC_STACK_BIAS + offset)); | |
8602 | |
8603 emit_insn (gen_stack_pointer_dec (GEN_INT (size))); | |
8604 emit_insn (gen_rtx_SET (VOIDmode, slot, reg)); | |
8605 if (reg2) | |
8606 emit_insn (gen_rtx_SET (VOIDmode, | |
8607 adjust_address (slot, word_mode, UNITS_PER_WORD), | |
8608 reg2)); | |
8609 emit_insn (seq); | |
8610 if (reg2) | |
8611 emit_insn (gen_rtx_SET (VOIDmode, | |
8612 reg2, | |
8613 adjust_address (slot, word_mode, UNITS_PER_WORD))); | |
8614 emit_insn (gen_rtx_SET (VOIDmode, reg, slot)); | |
8615 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); | |
8616 } | |
8617 | |
8618 /* Output the assembler code for a thunk function. THUNK_DECL is the | |
8619 declaration for the thunk function itself, FUNCTION is the decl for | |
8620 the target function. DELTA is an immediate constant offset to be | |
8621 added to THIS. If VCALL_OFFSET is nonzero, the word at address | |
8622 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ | |
8623 | |
8624 static void | |
8625 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, | |
8626 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, | |
8627 tree function) | |
8628 { | |
8629 rtx this_rtx, insn, funexp; | |
8630 unsigned int int_arg_first; | |
8631 | |
8632 reload_completed = 1; | |
8633 epilogue_completed = 1; | |
8634 | |
8635 emit_note (NOTE_INSN_PROLOGUE_END); | |
8636 | |
8637 if (flag_delayed_branch) | |
8638 { | |
8639 /* We will emit a regular sibcall below, so we need to instruct | |
8640 output_sibcall that we are in a leaf function. */ | |
8641 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1; | |
8642 | |
8643 /* This will cause final.c to invoke leaf_renumber_regs so we | |
8644 must behave as if we were in a not-yet-leafified function. */ | |
8645 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; | |
8646 } | |
8647 else | |
8648 { | |
8649 /* We will emit the sibcall manually below, so we will need to | |
8650 manually spill non-leaf registers. */ | |
8651 sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0; | |
8652 | |
8653 /* We really are in a leaf function. */ | |
8654 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; | |
8655 } | |
8656 | |
8657 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function | |
8658 returns a structure, the structure return pointer is there instead. */ | |
8659 if (TARGET_ARCH64 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) | |
8660 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); | |
8661 else | |
8662 this_rtx = gen_rtx_REG (Pmode, int_arg_first); | |
8663 | |
8664 /* Add DELTA. When possible use a plain add, otherwise load it into | |
8665 a register first. */ | |
8666 if (delta) | |
8667 { | |
8668 rtx delta_rtx = GEN_INT (delta); | |
8669 | |
8670 if (! SPARC_SIMM13_P (delta)) | |
8671 { | |
8672 rtx scratch = gen_rtx_REG (Pmode, 1); | |
8673 emit_move_insn (scratch, delta_rtx); | |
8674 delta_rtx = scratch; | |
8675 } | |
8676 | |
8677 /* THIS_RTX += DELTA. */ | |
8678 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); | |
8679 } | |
8680 | |
8681 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ | |
8682 if (vcall_offset) | |
8683 { | |
8684 rtx vcall_offset_rtx = GEN_INT (vcall_offset); | |
8685 rtx scratch = gen_rtx_REG (Pmode, 1); | |
8686 | |
8687 gcc_assert (vcall_offset < 0); | |
8688 | |
8689 /* SCRATCH = *THIS_RTX. */ | |
8690 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); | |
8691 | |
8692 /* Prepare for adding VCALL_OFFSET. The difficulty is that we | |
8693 may not have any available scratch register at this point. */ | |
8694 if (SPARC_SIMM13_P (vcall_offset)) | |
8695 ; | |
8696 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ | |
8697 else if (! fixed_regs[5] | |
8698 /* The below sequence is made up of at least 2 insns, | |
8699 while the default method may need only one. */ | |
8700 && vcall_offset < -8192) | |
8701 { | |
8702 rtx scratch2 = gen_rtx_REG (Pmode, 5); | |
8703 emit_move_insn (scratch2, vcall_offset_rtx); | |
8704 vcall_offset_rtx = scratch2; | |
8705 } | |
8706 else | |
8707 { | |
8708 rtx increment = GEN_INT (-4096); | |
8709 | |
8710 /* VCALL_OFFSET is a negative number whose typical range can be | |
8711 estimated as -32768..0 in 32-bit mode. In almost all cases | |
8712 it is therefore cheaper to emit multiple add insns than | |
8713 spilling and loading the constant into a register (at least | |
8714 6 insns). */ | |
8715 while (! SPARC_SIMM13_P (vcall_offset)) | |
8716 { | |
8717 emit_insn (gen_add2_insn (scratch, increment)); | |
8718 vcall_offset += 4096; | |
8719 } | |
8720 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ | |
8721 } | |
8722 | |
8723 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ | |
8724 emit_move_insn (scratch, gen_rtx_MEM (Pmode, | |
8725 gen_rtx_PLUS (Pmode, | |
8726 scratch, | |
8727 vcall_offset_rtx))); | |
8728 | |
8729 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ | |
8730 emit_insn (gen_add2_insn (this_rtx, scratch)); | |
8731 } | |
8732 | |
8733 /* Generate a tail call to the target function. */ | |
8734 if (! TREE_USED (function)) | |
8735 { | |
8736 assemble_external (function); | |
8737 TREE_USED (function) = 1; | |
8738 } | |
8739 funexp = XEXP (DECL_RTL (function), 0); | |
8740 | |
8741 if (flag_delayed_branch) | |
8742 { | |
8743 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); | |
8744 insn = emit_call_insn (gen_sibcall (funexp)); | |
8745 SIBLING_CALL_P (insn) = 1; | |
8746 } | |
8747 else | |
8748 { | |
8749 /* The hoops we have to jump through in order to generate a sibcall | |
8750 without using delay slots... */ | |
8751 rtx spill_reg, spill_reg2, seq, scratch = gen_rtx_REG (Pmode, 1); | |
8752 | |
8753 if (flag_pic) | |
8754 { | |
8755 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ | |
8756 spill_reg2 = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM); | |
8757 start_sequence (); | |
8758 /* Delay emitting the PIC helper function because it needs to | |
8759 change the section and we are emitting assembly code. */ | |
8760 load_pic_register (true); /* clobbers %o7 */ | |
8761 scratch = legitimize_pic_address (funexp, Pmode, scratch); | |
8762 seq = get_insns (); | |
8763 end_sequence (); | |
8764 emit_and_preserve (seq, spill_reg, spill_reg2); | |
8765 } | |
8766 else if (TARGET_ARCH32) | |
8767 { | |
8768 emit_insn (gen_rtx_SET (VOIDmode, | |
8769 scratch, | |
8770 gen_rtx_HIGH (SImode, funexp))); | |
8771 emit_insn (gen_rtx_SET (VOIDmode, | |
8772 scratch, | |
8773 gen_rtx_LO_SUM (SImode, scratch, funexp))); | |
8774 } | |
8775 else /* TARGET_ARCH64 */ | |
8776 { | |
8777 switch (sparc_cmodel) | |
8778 { | |
8779 case CM_MEDLOW: | |
8780 case CM_MEDMID: | |
8781 /* The destination can serve as a temporary. */ | |
8782 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); | |
8783 break; | |
8784 | |
8785 case CM_MEDANY: | |
8786 case CM_EMBMEDANY: | |
8787 /* The destination cannot serve as a temporary. */ | |
8788 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ | |
8789 start_sequence (); | |
8790 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); | |
8791 seq = get_insns (); | |
8792 end_sequence (); | |
8793 emit_and_preserve (seq, spill_reg, 0); | |
8794 break; | |
8795 | |
8796 default: | |
8797 gcc_unreachable (); | |
8798 } | |
8799 } | |
8800 | |
8801 emit_jump_insn (gen_indirect_jump (scratch)); | |
8802 } | |
8803 | |
8804 emit_barrier (); | |
8805 | |
8806 /* Run just enough of rest_of_compilation to get the insns emitted. | |
8807 There's not really enough bulk here to make other passes such as | |
8808 instruction scheduling worth while. Note that use_thunk calls | |
8809 assemble_start_function and assemble_end_function. */ | |
8810 insn = get_insns (); | |
8811 insn_locators_alloc (); | |
8812 shorten_branches (insn); | |
8813 final_start_function (insn, file, 1); | |
8814 final (insn, file, 1); | |
8815 final_end_function (); | |
8816 free_after_compilation (cfun); | |
8817 | |
8818 reload_completed = 0; | |
8819 epilogue_completed = 0; | |
8820 } | |
8821 | |
8822 /* Return true if sparc_output_mi_thunk would be able to output the | |
8823 assembler code for the thunk function specified by the arguments | |
8824 it is passed, and false otherwise. */ | |
8825 static bool | |
8826 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, | |
8827 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, | |
8828 HOST_WIDE_INT vcall_offset, | |
8829 const_tree function ATTRIBUTE_UNUSED) | |
8830 { | |
8831 /* Bound the loop used in the default method above. */ | |
8832 return (vcall_offset >= -32768 || ! fixed_regs[5]); | |
8833 } | |
8834 | |
8835 /* How to allocate a 'struct machine_function'. */ | |
8836 | |
8837 static struct machine_function * | |
8838 sparc_init_machine_status (void) | |
8839 { | |
8840 return GGC_CNEW (struct machine_function); | |
8841 } | |
8842 | |
8843 /* Locate some local-dynamic symbol still in use by this function | |
8844 so that we can print its name in local-dynamic base patterns. */ | |
8845 | |
8846 static const char * | |
8847 get_some_local_dynamic_name (void) | |
8848 { | |
8849 rtx insn; | |
8850 | |
8851 if (cfun->machine->some_ld_name) | |
8852 return cfun->machine->some_ld_name; | |
8853 | |
8854 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) | |
8855 if (INSN_P (insn) | |
8856 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) | |
8857 return cfun->machine->some_ld_name; | |
8858 | |
8859 gcc_unreachable (); | |
8860 } | |
8861 | |
8862 static int | |
8863 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) | |
8864 { | |
8865 rtx x = *px; | |
8866 | |
8867 if (x | |
8868 && GET_CODE (x) == SYMBOL_REF | |
8869 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) | |
8870 { | |
8871 cfun->machine->some_ld_name = XSTR (x, 0); | |
8872 return 1; | |
8873 } | |
8874 | |
8875 return 0; | |
8876 } | |
8877 | |
8878 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. | |
8879 This is called from dwarf2out.c to emit call frame instructions | |
8880 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ | |
8881 static void | |
8882 sparc_dwarf_handle_frame_unspec (const char *label, | |
8883 rtx pattern ATTRIBUTE_UNUSED, | |
8884 int index ATTRIBUTE_UNUSED) | |
8885 { | |
8886 gcc_assert (index == UNSPECV_SAVEW); | |
8887 dwarf2out_window_save (label); | |
8888 } | |
8889 | |
8890 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. | |
8891 We need to emit DTP-relative relocations. */ | |
8892 | |
8893 static void | |
8894 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) | |
8895 { | |
8896 switch (size) | |
8897 { | |
8898 case 4: | |
8899 fputs ("\t.word\t%r_tls_dtpoff32(", file); | |
8900 break; | |
8901 case 8: | |
8902 fputs ("\t.xword\t%r_tls_dtpoff64(", file); | |
8903 break; | |
8904 default: | |
8905 gcc_unreachable (); | |
8906 } | |
8907 output_addr_const (file, x); | |
8908 fputs (")", file); | |
8909 } | |
8910 | |
8911 /* Do whatever processing is required at the end of a file. */ | |
8912 | |
8913 static void | |
8914 sparc_file_end (void) | |
8915 { | |
8916 /* If we haven't emitted the special PIC helper function, do so now. */ | |
8917 if (pic_helper_symbol_name[0] && !pic_helper_emitted_p) | |
8918 emit_pic_helper (); | |
8919 | |
8920 if (NEED_INDICATE_EXEC_STACK) | |
8921 file_end_indicate_exec_stack (); | |
8922 } | |
8923 | |
8924 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING | |
8925 /* Implement TARGET_MANGLE_TYPE. */ | |
8926 | |
8927 static const char * | |
8928 sparc_mangle_type (const_tree type) | |
8929 { | |
8930 if (!TARGET_64BIT | |
8931 && TYPE_MAIN_VARIANT (type) == long_double_type_node | |
8932 && TARGET_LONG_DOUBLE_128) | |
8933 return "g"; | |
8934 | |
8935 /* For all other types, use normal C++ mangling. */ | |
8936 return NULL; | |
8937 } | |
8938 #endif | |
8939 | |
8940 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit | |
8941 compare and swap on the word containing the byte or half-word. */ | |
8942 | |
8943 void | |
8944 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval) | |
8945 { | |
8946 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); | |
8947 rtx addr = gen_reg_rtx (Pmode); | |
8948 rtx off = gen_reg_rtx (SImode); | |
8949 rtx oldv = gen_reg_rtx (SImode); | |
8950 rtx newv = gen_reg_rtx (SImode); | |
8951 rtx oldvalue = gen_reg_rtx (SImode); | |
8952 rtx newvalue = gen_reg_rtx (SImode); | |
8953 rtx res = gen_reg_rtx (SImode); | |
8954 rtx resv = gen_reg_rtx (SImode); | |
8955 rtx memsi, val, mask, end_label, loop_label, cc; | |
8956 | |
8957 emit_insn (gen_rtx_SET (VOIDmode, addr, | |
8958 gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); | |
8959 | |
8960 if (Pmode != SImode) | |
8961 addr1 = gen_lowpart (SImode, addr1); | |
8962 emit_insn (gen_rtx_SET (VOIDmode, off, | |
8963 gen_rtx_AND (SImode, addr1, GEN_INT (3)))); | |
8964 | |
8965 memsi = gen_rtx_MEM (SImode, addr); | |
8966 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); | |
8967 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); | |
8968 | |
8969 val = force_reg (SImode, memsi); | |
8970 | |
8971 emit_insn (gen_rtx_SET (VOIDmode, off, | |
8972 gen_rtx_XOR (SImode, off, | |
8973 GEN_INT (GET_MODE (mem) == QImode | |
8974 ? 3 : 2)))); | |
8975 | |
8976 emit_insn (gen_rtx_SET (VOIDmode, off, | |
8977 gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); | |
8978 | |
8979 if (GET_MODE (mem) == QImode) | |
8980 mask = force_reg (SImode, GEN_INT (0xff)); | |
8981 else | |
8982 mask = force_reg (SImode, GEN_INT (0xffff)); | |
8983 | |
8984 emit_insn (gen_rtx_SET (VOIDmode, mask, | |
8985 gen_rtx_ASHIFT (SImode, mask, off))); | |
8986 | |
8987 emit_insn (gen_rtx_SET (VOIDmode, val, | |
8988 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), | |
8989 val))); | |
8990 | |
8991 oldval = gen_lowpart (SImode, oldval); | |
8992 emit_insn (gen_rtx_SET (VOIDmode, oldv, | |
8993 gen_rtx_ASHIFT (SImode, oldval, off))); | |
8994 | |
8995 newval = gen_lowpart_common (SImode, newval); | |
8996 emit_insn (gen_rtx_SET (VOIDmode, newv, | |
8997 gen_rtx_ASHIFT (SImode, newval, off))); | |
8998 | |
8999 emit_insn (gen_rtx_SET (VOIDmode, oldv, | |
9000 gen_rtx_AND (SImode, oldv, mask))); | |
9001 | |
9002 emit_insn (gen_rtx_SET (VOIDmode, newv, | |
9003 gen_rtx_AND (SImode, newv, mask))); | |
9004 | |
9005 end_label = gen_label_rtx (); | |
9006 loop_label = gen_label_rtx (); | |
9007 emit_label (loop_label); | |
9008 | |
9009 emit_insn (gen_rtx_SET (VOIDmode, oldvalue, | |
9010 gen_rtx_IOR (SImode, oldv, val))); | |
9011 | |
9012 emit_insn (gen_rtx_SET (VOIDmode, newvalue, | |
9013 gen_rtx_IOR (SImode, newv, val))); | |
9014 | |
9015 emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue)); | |
9016 | |
9017 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); | |
9018 | |
9019 emit_insn (gen_rtx_SET (VOIDmode, resv, | |
9020 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), | |
9021 res))); | |
9022 | |
9023 sparc_compare_op0 = resv; | |
9024 sparc_compare_op1 = val; | |
9025 cc = gen_compare_reg (NE); | |
9026 | |
9027 emit_insn (gen_rtx_SET (VOIDmode, val, resv)); | |
9028 | |
9029 sparc_compare_emitted = cc; | |
9030 emit_jump_insn (gen_bne (loop_label)); | |
9031 | |
9032 emit_label (end_label); | |
9033 | |
9034 emit_insn (gen_rtx_SET (VOIDmode, res, | |
9035 gen_rtx_AND (SImode, res, mask))); | |
9036 | |
9037 emit_insn (gen_rtx_SET (VOIDmode, res, | |
9038 gen_rtx_LSHIFTRT (SImode, res, off))); | |
9039 | |
9040 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); | |
9041 } | |
9042 | |
9043 #include "gt-sparc.h" |