111
|
1 /* -----------------------------------------------------------------------
|
|
2 tile.S - Copyright (c) 2011 Tilera Corp.
|
|
3
|
|
4 Tilera TILEPro and TILE-Gx Foreign Function Interface
|
|
5
|
|
6 Permission is hereby granted, free of charge, to any person obtaining
|
|
7 a copy of this software and associated documentation files (the
|
|
8 ``Software''), to deal in the Software without restriction, including
|
|
9 without limitation the rights to use, copy, modify, merge, publish,
|
|
10 distribute, sublicense, and/or sell copies of the Software, and to
|
|
11 permit persons to whom the Software is furnished to do so, subject to
|
|
12 the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be included
|
|
15 in all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
|
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
24 DEALINGS IN THE SOFTWARE.
|
|
25 ----------------------------------------------------------------------- */
|
|
26
|
|
27 #define LIBFFI_ASM
|
|
28 #include <fficonfig.h>
|
|
29 #include <ffi.h>
|
|
30
|
|
31 /* Number of bytes in a register. */
|
|
32 #define REG_SIZE FFI_SIZEOF_ARG
|
|
33
|
|
34 /* Number of bytes in stack linkage area for backtracing.
|
|
35
|
|
36 A note about the ABI: on entry to a procedure, sp points to a stack
|
|
37 slot where it must spill the return address if it's not a leaf.
|
|
38 REG_SIZE bytes beyond that is a slot owned by the caller which
|
|
39 contains the sp value that the caller had when it was originally
|
|
40 entered (i.e. the caller's frame pointer). */
|
|
41 #define LINKAGE_SIZE (2 * REG_SIZE)
|
|
42
|
|
43 /* The first 10 registers are used to pass arguments and return values. */
|
|
44 #define NUM_ARG_REGS 10
|
|
45
|
|
46 #ifdef __tilegx__
|
|
47 #define SW st
|
|
48 #define LW ld
|
|
49 #define BGZT bgtzt
|
|
50 #else
|
|
51 #define SW sw
|
|
52 #define LW lw
|
|
53 #define BGZT bgzt
|
|
54 #endif
|
|
55
|
|
56
|
|
57 /* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
|
|
58 const int_reg_t *stack_args,
|
|
59 unsigned long stack_args_bytes,
|
|
60 void (*fnaddr)(void));
|
|
61
|
|
62 On entry, REG_ARGS contain the outgoing register values,
|
|
63 and STACK_ARGS contains STACK_ARG_BYTES of additional values
|
|
64 to be passed on the stack. If STACK_ARG_BYTES is zero, then
|
|
65 STACK_ARGS is ignored.
|
|
66
|
|
67 When the invoked function returns, the values of r0-r9 are
|
|
68 blindly stored back into REG_ARGS for the caller to examine. */
|
|
69
|
|
70 .section .text.ffi_call_tile, "ax", @progbits
|
|
71 .align 8
|
|
72 .globl ffi_call_tile
|
|
73 FFI_HIDDEN(ffi_call_tile)
|
|
74 ffi_call_tile:
|
|
75
|
|
76 /* Incoming arguments. */
|
|
77 #define REG_ARGS r0
|
|
78 #define INCOMING_STACK_ARGS r1
|
|
79 #define STACK_ARG_BYTES r2
|
|
80 #define ORIG_FNADDR r3
|
|
81
|
|
82 /* Temporary values. */
|
|
83 #define FRAME_SIZE r10
|
|
84 #define TMP r11
|
|
85 #define TMP2 r12
|
|
86 #define OUTGOING_STACK_ARGS r13
|
|
87 #define REG_ADDR_PTR r14
|
|
88 #define RETURN_REG_ADDR r15
|
|
89 #define FNADDR r16
|
|
90
|
|
91 .cfi_startproc
|
|
92 {
|
|
93 /* Save return address. */
|
|
94 SW sp, lr
|
|
95 .cfi_offset lr, 0
|
|
96 /* Prepare to spill incoming r52. */
|
|
97 addi TMP, sp, -REG_SIZE
|
|
98 /* Increase frame size to have room to spill r52 and REG_ARGS.
|
|
99 The +7 is to round up mod 8. */
|
|
100 addi FRAME_SIZE, STACK_ARG_BYTES, \
|
|
101 REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
|
|
102 }
|
|
103 {
|
|
104 /* Round stack frame size to a multiple of 8 to satisfy ABI. */
|
|
105 andi FRAME_SIZE, FRAME_SIZE, -8
|
|
106 /* Compute where to spill REG_ARGS value. */
|
|
107 addi TMP2, sp, -(REG_SIZE * 2)
|
|
108 }
|
|
109 {
|
|
110 /* Spill incoming r52. */
|
|
111 SW TMP, r52
|
|
112 .cfi_offset r52, -REG_SIZE
|
|
113 /* Set up our frame pointer. */
|
|
114 move r52, sp
|
|
115 .cfi_def_cfa_register r52
|
|
116 /* Push stack frame. */
|
|
117 sub sp, sp, FRAME_SIZE
|
|
118 }
|
|
119 {
|
|
120 /* Prepare to set up stack linkage. */
|
|
121 addi TMP, sp, REG_SIZE
|
|
122 /* Prepare to memcpy stack args. */
|
|
123 addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
|
|
124 /* Save REG_ARGS which we will need after we call the subroutine. */
|
|
125 SW TMP2, REG_ARGS
|
|
126 }
|
|
127 {
|
|
128 /* Set up linkage info to hold incoming stack pointer. */
|
|
129 SW TMP, r52
|
|
130 }
|
|
131 {
|
|
132 /* Skip stack args memcpy if we don't have any stack args (common). */
|
|
133 blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy
|
|
134 }
|
|
135
|
|
136 .Lmemcpy_stack_args:
|
|
137 {
|
|
138 /* Load incoming argument from stack_args. */
|
|
139 LW TMP, INCOMING_STACK_ARGS
|
|
140 addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
|
|
141 }
|
|
142 {
|
|
143 /* Store stack argument into outgoing stack argument area. */
|
|
144 SW OUTGOING_STACK_ARGS, TMP
|
|
145 addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
|
|
146 addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
|
|
147 }
|
|
148 {
|
|
149 BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args
|
|
150 }
|
|
151 .Ldone_stack_args_memcpy:
|
|
152
|
|
153 {
|
|
154 /* Copy aside ORIG_FNADDR so we can overwrite its register. */
|
|
155 move FNADDR, ORIG_FNADDR
|
|
156 /* Prepare to load argument registers. */
|
|
157 addi REG_ADDR_PTR, r0, REG_SIZE
|
|
158 /* Load outgoing r0. */
|
|
159 LW r0, r0
|
|
160 }
|
|
161
|
|
162 /* Load up argument registers from the REG_ARGS array. */
|
|
163 #define LOAD_REG(REG, PTR) \
|
|
164 { \
|
|
165 LW REG, PTR ; \
|
|
166 addi PTR, PTR, REG_SIZE \
|
|
167 }
|
|
168
|
|
169 LOAD_REG(r1, REG_ADDR_PTR)
|
|
170 LOAD_REG(r2, REG_ADDR_PTR)
|
|
171 LOAD_REG(r3, REG_ADDR_PTR)
|
|
172 LOAD_REG(r4, REG_ADDR_PTR)
|
|
173 LOAD_REG(r5, REG_ADDR_PTR)
|
|
174 LOAD_REG(r6, REG_ADDR_PTR)
|
|
175 LOAD_REG(r7, REG_ADDR_PTR)
|
|
176 LOAD_REG(r8, REG_ADDR_PTR)
|
|
177 LOAD_REG(r9, REG_ADDR_PTR)
|
|
178
|
|
179 {
|
|
180 /* Call the subroutine. */
|
|
181 jalr FNADDR
|
|
182 }
|
|
183
|
|
184 {
|
|
185 /* Restore original lr. */
|
|
186 LW lr, r52
|
|
187 /* Prepare to recover ARGS, which we spilled earlier. */
|
|
188 addi TMP, r52, -(2 * REG_SIZE)
|
|
189 }
|
|
190 {
|
|
191 /* Restore ARGS, so we can fill it in with the return regs r0-r9. */
|
|
192 LW RETURN_REG_ADDR, TMP
|
|
193 /* Prepare to restore original r52. */
|
|
194 addi TMP, r52, -REG_SIZE
|
|
195 }
|
|
196
|
|
197 {
|
|
198 /* Pop stack frame. */
|
|
199 move sp, r52
|
|
200 /* Restore original r52. */
|
|
201 LW r52, TMP
|
|
202 }
|
|
203
|
|
204 #define STORE_REG(REG, PTR) \
|
|
205 { \
|
|
206 SW PTR, REG ; \
|
|
207 addi PTR, PTR, REG_SIZE \
|
|
208 }
|
|
209
|
|
210 /* Return all register values by reference. */
|
|
211 STORE_REG(r0, RETURN_REG_ADDR)
|
|
212 STORE_REG(r1, RETURN_REG_ADDR)
|
|
213 STORE_REG(r2, RETURN_REG_ADDR)
|
|
214 STORE_REG(r3, RETURN_REG_ADDR)
|
|
215 STORE_REG(r4, RETURN_REG_ADDR)
|
|
216 STORE_REG(r5, RETURN_REG_ADDR)
|
|
217 STORE_REG(r6, RETURN_REG_ADDR)
|
|
218 STORE_REG(r7, RETURN_REG_ADDR)
|
|
219 STORE_REG(r8, RETURN_REG_ADDR)
|
|
220 STORE_REG(r9, RETURN_REG_ADDR)
|
|
221
|
|
222 {
|
|
223 jrp lr
|
|
224 }
|
|
225
|
|
226 .cfi_endproc
|
|
227 .size ffi_call_tile, .-ffi_call_tile
|
|
228
|
|
229 /* ffi_closure_tile(...)
|
|
230
|
|
231 On entry, lr points to the closure plus 8 bytes, and r10
|
|
232 contains the actual return address.
|
|
233
|
|
234 This function simply dumps all register parameters into a stack array
|
|
235 and passes the closure, the registers array, and the stack arguments
|
|
236 to C code that does all of the actual closure processing. */
|
|
237
|
|
238 .section .text.ffi_closure_tile, "ax", @progbits
|
|
239 .align 8
|
|
240 .globl ffi_closure_tile
|
|
241 FFI_HIDDEN(ffi_closure_tile)
|
|
242
|
|
243 .cfi_startproc
|
|
244 /* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
|
|
245 #define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
|
|
246 ffi_closure_tile:
|
|
247 {
|
|
248 #ifdef __tilegx__
|
|
249 st sp, lr
|
|
250 .cfi_offset lr, 0
|
|
251 #else
|
|
252 /* Save return address (in r10 due to closure stub wrapper). */
|
|
253 SW sp, r10
|
|
254 .cfi_return_column r10
|
|
255 .cfi_offset r10, 0
|
|
256 #endif
|
|
257 /* Compute address for stack frame linkage. */
|
|
258 addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
|
259 }
|
|
260 {
|
|
261 /* Save incoming stack pointer in linkage area. */
|
|
262 SW r10, sp
|
|
263 .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
|
264 /* Push a new stack frame. */
|
|
265 addli sp, sp, -CLOSURE_FRAME_SIZE
|
|
266 .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
|
|
267 }
|
|
268
|
|
269 {
|
|
270 /* Create pointer to where to start spilling registers. */
|
|
271 addi r10, sp, LINKAGE_SIZE
|
|
272 }
|
|
273
|
|
274 /* Spill all the incoming registers. */
|
|
275 STORE_REG(r0, r10)
|
|
276 STORE_REG(r1, r10)
|
|
277 STORE_REG(r2, r10)
|
|
278 STORE_REG(r3, r10)
|
|
279 STORE_REG(r4, r10)
|
|
280 STORE_REG(r5, r10)
|
|
281 STORE_REG(r6, r10)
|
|
282 STORE_REG(r7, r10)
|
|
283 STORE_REG(r8, r10)
|
|
284 {
|
|
285 /* Save r9. */
|
|
286 SW r10, r9
|
|
287 #ifdef __tilegx__
|
|
288 /* Pointer to closure is passed in r11. */
|
|
289 move r0, r11
|
|
290 #else
|
|
291 /* Compute pointer to the closure object. Because the closure
|
|
292 starts with a "jal ffi_closure_tile", we can just take the
|
|
293 value of lr (a phony return address pointing into the closure)
|
|
294 and subtract 8. */
|
|
295 addi r0, lr, -8
|
|
296 #endif
|
|
297 /* Compute a pointer to the register arguments we just spilled. */
|
|
298 addi r1, sp, LINKAGE_SIZE
|
|
299 }
|
|
300 {
|
|
301 /* Compute a pointer to the extra stack arguments (if any). */
|
|
302 addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
|
|
303 /* Call C code to deal with all of the grotty details. */
|
|
304 jal ffi_closure_tile_inner
|
|
305 }
|
|
306 {
|
|
307 addli r10, sp, CLOSURE_FRAME_SIZE
|
|
308 }
|
|
309 {
|
|
310 /* Restore the return address. */
|
|
311 LW lr, r10
|
|
312 /* Compute pointer to registers array. */
|
|
313 addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
|
|
314 }
|
|
315 /* Return all the register values, which C code may have set. */
|
|
316 LOAD_REG(r0, r10)
|
|
317 LOAD_REG(r1, r10)
|
|
318 LOAD_REG(r2, r10)
|
|
319 LOAD_REG(r3, r10)
|
|
320 LOAD_REG(r4, r10)
|
|
321 LOAD_REG(r5, r10)
|
|
322 LOAD_REG(r6, r10)
|
|
323 LOAD_REG(r7, r10)
|
|
324 LOAD_REG(r8, r10)
|
|
325 LOAD_REG(r9, r10)
|
|
326 {
|
|
327 /* Pop the frame. */
|
|
328 addli sp, sp, CLOSURE_FRAME_SIZE
|
|
329 jrp lr
|
|
330 }
|
|
331
|
|
332 .cfi_endproc
|
|
333 .size ffi_closure_tile, . - ffi_closure_tile
|
|
334
|
|
335
|
|
336 /* What follows are code template instructions that get copied to the
|
|
337 closure trampoline by ffi_prep_closure_loc. The zeroed operands
|
|
338 get replaced by their proper values at runtime. */
|
|
339
|
|
340 .section .text.ffi_template_tramp_tile, "ax", @progbits
|
|
341 .align 8
|
|
342 .globl ffi_template_tramp_tile
|
|
343 FFI_HIDDEN(ffi_template_tramp_tile)
|
|
344 ffi_template_tramp_tile:
|
|
345 #ifdef __tilegx__
|
|
346 {
|
|
347 moveli r11, 0 /* backpatched to address of containing closure. */
|
|
348 moveli r10, 0 /* backpatched to ffi_closure_tile. */
|
|
349 }
|
|
350 /* Note: the following bundle gets generated multiple times
|
|
351 depending on the pointer value (esp. useful for -m32 mode). */
|
|
352 { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
|
|
353 { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
|
|
354 #else
|
|
355 /* 'jal .' yields a PC-relative offset of zero so we can OR in the
|
|
356 right offset at runtime. */
|
|
357 { move r10, lr ; jal . /* ffi_closure_tile */ }
|
|
358 #endif
|
|
359
|
|
360 .size ffi_template_tramp_tile, . - ffi_template_tramp_tile
|