111
|
1 ;; GCC machine description for CRIS atomic memory sequences.
|
|
2 ;; Copyright (C) 2012-2017 Free Software Foundation, Inc.
|
|
3 ;;
|
|
4 ;; This file is part of GCC.
|
|
5 ;;
|
|
6 ;; GCC is free software; you can redistribute it and/or modify
|
|
7 ;; it under the terms of the GNU General Public License as published by
|
|
8 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
9 ;; any later version.
|
|
10 ;;
|
|
11 ;; GCC is distributed in the hope that it will be useful,
|
|
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 ;; GNU General Public License for more details.
|
|
15 ;;
|
|
16 ;; You should have received a copy of the GNU General Public License
|
|
17 ;; along with GCC; see the file COPYING3. If not see
|
|
18 ;; <http://www.gnu.org/licenses/>.
|
|
19
|
|
20 ;; The CRIS atomic support yields code in three flavors, depending on
|
|
21 ;; the CPU for which code is generated:
|
|
22 ;;
|
|
23 ;; - Plain old CRIS v0 (..v8)
|
|
24 ;; - CRIS v10 (as used in ETRAX 100 LX)
|
|
25 ;; - CRIS v32 (as used in ETRAX FS)
|
|
26 ;;
|
|
27 ;; The last two alternatives are similar, of LL/SC type. They may
|
|
28 ;; fail for other reasons; an exception, a cache miss or a bus request
|
|
29 ;; from other parts of the system. The difference between them is
|
|
30 ;; just in what condition-codes are used to track LL and success or
|
|
31 ;; failure for the store. See the chapter on integral read-write
|
|
32 ;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual",
|
|
33 ;; <http://www.axis.com/files/tech_notes/etrax_100lx_prog_man-050519.pdf>
|
|
34 ;; and chapter 2.1 in "ETRAX FS Designer's reference",
|
|
35 ;; <http://www.axis.com/files/manuals/etrax_fs_des_ref-070821.pdf>.
|
|
36 ;; Note that the datum being stored has to be contained fully within a
|
|
37 ;; cache-line to be integral. A failure to store the data integrally
|
|
38 ;; will be flagged, but the store may still have happened in part,
|
|
39 ;; which translates most usefully into the data having to be
|
|
40 ;; "naturally aligned" to work. Natural alignment is verified in the
|
|
41 ;; generated code and will by default cause for unaligned pointers a
|
|
42 ;; "break 8" to be executed or optionally a call to abort(). Beware
|
|
43 ;; that options -m16bit and -m8bit may cause data to be unaligned
|
|
44 ;; where it was otherwise aligned. Data has a better chance of being
|
|
45 ;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))).
|
|
46 ;;
|
|
47 ;; The "plain old v0..v8 flavor" just assumes there's a single CPU in
|
|
48 ;; the system, that no other parts of the system have access to memory
|
|
49 ;; used for atomic accesses and since there's no user mode without
|
|
50 ;; access to interrupt flags (another assumption), it just turns off
|
|
51 ;; interrupts while doing the access. Here, alignment is neither
|
|
52 ;; required nor asserted.
|
|
53
|
|
54 (define_c_enum ""
|
|
55 [
|
|
56 CRIS_UNSPEC_ATOMIC_OP
|
|
57 CRIS_UNSPEC_ATOMIC_SWAP_MEM
|
|
58 CRIS_UNSPEC_ATOMIC_SWAP_BOOL
|
|
59 ])
|
|
60
|
|
61 (define_constants [(CRIS_CCR_INTERRUPT_BIT 5)])
|
|
62
|
|
63 ;; We use "mult" as a placeholder for "nand" (which does not have a
|
|
64 ;; separate binary rtx operation) so we can use an iterator in the
|
|
65 ;; define_expand and define_insn and avoid having a separate
|
|
66 ;; mostly-identical copy. You will see the "mult" operator in rtl
|
|
67 ;; dumps, but it shouldn't matter as its use has one of its operands
|
|
68 ;; inside an unspec_volatile.
|
|
69
|
|
70 (define_code_iterator atomic_op [plus minus ior and xor mult])
|
|
71
|
|
72 (define_code_attr atomic_op_name
|
|
73 [(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")])
|
|
74
|
|
75 ;; The operator nonatomic-operand can be memory, constant or register
|
|
76 ;; for all but xor. We can't use memory or addressing modes with
|
|
77 ;; side-effects though, so just use registers and literal constants.
|
|
78 (define_code_attr atomic_op_op_cnstr
|
|
79 [(plus "ri") (minus "ri") (and "ri") (ior "ri") (xor "r") (mult "ri")])
|
|
80
|
|
81 (define_code_attr atomic_op_op_pred
|
|
82 [(plus "nonmemory_operand") (minus "nonmemory_operand")
|
|
83 (and "nonmemory_operand") (ior "nonmemory_operand")
|
|
84 (xor "register_operand") (mult "nonmemory_operand")])
|
|
85
|
|
86 ;; Pairs of these are used to insert the "not" after the "and" for nand.
|
|
87 (define_code_attr atomic_op_mnem_pre_op2 ;; Upper-case only to simplify testing.
|
|
88 [(plus "%P2") (minus "Sub.d %2") (and "And%q2 %2") (ior "Or%q2 %2") (xor "Xor %2")
|
|
89 (mult "aNd%q2 %2")])
|
|
90
|
|
91 (define_code_attr atomic_op_mnem_post_op3
|
|
92 [(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")])
|
|
93
|
|
94 ;; For SImode, emit "q" for operands -31..31.
|
|
95 (define_mode_attr qm3 [(SI "%q3") (HI ".w") (QI ".b")])
|
|
96
|
|
97 (define_expand "atomic_fetch_<atomic_op_name><mode>"
|
|
98 [(match_operand:BWD 0 "register_operand")
|
|
99 (match_operand:BWD 1 "memory_operand")
|
|
100 (match_operand:BWD 2 "<atomic_op_op_pred>")
|
|
101 (match_operand 3)
|
|
102 (atomic_op:BWD (match_dup 0) (match_dup 1))]
|
|
103 "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
|
|
104 {
|
|
105 enum memmodel mmodel = (enum memmodel) INTVAL (operands[3]);
|
|
106
|
|
107 if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
|
|
108 cris_emit_trap_for_misalignment (operands[1]);
|
|
109
|
|
110 if (need_atomic_barrier_p (mmodel, true))
|
|
111 expand_mem_thread_fence (mmodel);
|
|
112
|
|
113 emit_insn (gen_cris_atomic_fetch_<atomic_op_name><mode>_1 (operands[0],
|
|
114 operands[1],
|
|
115 operands[2]));
|
|
116 if (need_atomic_barrier_p (mmodel, false))
|
|
117 expand_mem_thread_fence (mmodel);
|
|
118
|
|
119 DONE;
|
|
120 })
|
|
121
|
|
122 (define_insn "cris_atomic_fetch_<atomic_op_name><mode>_1"
|
|
123 [(set (match_operand:BWD 1 "memory_operand" "+Q")
|
|
124 (atomic_op:BWD
|
|
125 (unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP)
|
|
126 ;; FIXME: improve constants more for plus, minus, and, ior.
|
|
127 ;; FIXME: handle memory operands without side-effects.
|
|
128 (match_operand:BWD 2 "<atomic_op_op_pred>" "<atomic_op_op_cnstr>")))
|
|
129 (set (match_operand:BWD 0 "register_operand" "=&r")
|
|
130 (match_dup 1))
|
|
131 (clobber (match_scratch:SI 3 "=&r"))]
|
|
132 "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
|
|
133 {
|
|
134 /* Can't be too sure; better ICE if this happens. */
|
|
135 gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
|
|
136
|
|
137 if (TARGET_V32)
|
|
138 return
|
|
139 "clearf p\n"
|
|
140 ".Lsync.%=:\;"
|
|
141 "move<m> %1,%0\;"
|
|
142 "move.d %0,%3\;"
|
|
143 "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
|
|
144 "ax\;"
|
|
145 "move<m> %3,%1\;"
|
|
146 "bcs .Lsync.%=\;"
|
|
147 "clearf p";
|
|
148 else if (cris_cpu_version == 10)
|
|
149 return
|
|
150 "clearf\n"
|
|
151 ".Lsync.%=:\;"
|
|
152 "move<m> %1,%0\;"
|
|
153 "move.d %0,%3\;"
|
|
154 "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
|
|
155 "ax\;"
|
|
156 "move<m> %3,%1\;"
|
|
157 "bwf .Lsync.%=\;"
|
|
158 "clearf";
|
|
159 else
|
|
160 {
|
|
161 /* This one is for CRIS versions without load-locked-store-conditional
|
|
162 machinery; assume single-core-non-shared-memory without user
|
|
163 mode/supervisor mode distinction, and just disable interrupts
|
|
164 while performing the operation.
|
|
165 Rather than making this pattern more complex by freeing another
|
|
166 register or stack position to save condition codes (the value
|
|
167 of the interrupt-enabled bit), we check whether interrupts were
|
|
168 enabled before we disabled them and branch to a version
|
|
169 with/without afterwards re-enabling them. */
|
|
170 rtx ops[5];
|
|
171
|
|
172 /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
|
|
173 memcpy (ops, operands, sizeof(ops));
|
|
174 ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
|
|
175
|
|
176 output_asm_insn ("move $ccr,%3\;"
|
|
177 "di\;"
|
|
178 "move<m> %1,%0\;"
|
|
179 "btstq %4,%3",
|
|
180 ops);
|
|
181 return
|
|
182 "bmi .Lsync.irqon.%=\;"
|
|
183 "move.d %0,%3\;"
|
|
184
|
|
185 "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
|
|
186 "ba .Lsync.irqoff.%=\;"
|
|
187 "move<m> %3,%1\n"
|
|
188
|
|
189 ".Lsync.irqon.%=:\;"
|
|
190 "<atomic_op_mnem_pre_op2>,%3\;<atomic_op_mnem_post_op3>"
|
|
191 "move<m> %3,%1\;"
|
|
192 "ei\n"
|
|
193 ".Lsync.irqoff.%=:";
|
|
194 }
|
|
195 })
|
|
196
|
|
197 ;; This pattern is more-or-less assumed to always exist if any of the
|
|
198 ;; other atomic patterns exist (see e.g. comment at the
|
|
199 ;; can_compare_and_swap_p call in omp-low.c, 4.8 era). We'd slightly
|
|
200 ;; prefer atomic_exchange<mode> over this, but having both would be
|
|
201 ;; redundant.
|
|
202 ;; FIXME: handle memory without side-effects for operand[3].
|
|
203 (define_expand "atomic_compare_and_swap<mode>"
|
|
204 [(match_operand:SI 0 "register_operand")
|
|
205 (match_operand:BWD 1 "register_operand")
|
|
206 (match_operand:BWD 2 "memory_operand")
|
|
207 (match_operand:BWD 3 "nonmemory_operand")
|
|
208 (match_operand:BWD 4 "register_operand")
|
|
209 (match_operand 5)
|
|
210 (match_operand 6)
|
|
211 (match_operand 7)]
|
|
212 "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
|
|
213 {
|
|
214 enum memmodel mmodel = (enum memmodel) INTVAL (operands[6]);
|
|
215
|
|
216 if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
|
|
217 cris_emit_trap_for_misalignment (operands[2]);
|
|
218
|
|
219 if (need_atomic_barrier_p (mmodel, true))
|
|
220 expand_mem_thread_fence (mmodel);
|
|
221
|
|
222 emit_insn (gen_cris_atomic_compare_and_swap<mode>_1 (operands[0],
|
|
223 operands[1],
|
|
224 operands[2],
|
|
225 operands[3],
|
|
226 operands[4]));
|
|
227 if (need_atomic_barrier_p (mmodel, false))
|
|
228 expand_mem_thread_fence (mmodel);
|
|
229
|
|
230 DONE;
|
|
231 })
|
|
232
|
|
233 (define_insn "cris_atomic_compare_and_swap<mode>_1"
|
|
234 [(set (match_operand:SI 0 "register_operand" "=&r")
|
|
235 (unspec_volatile:SI
|
|
236 [(match_operand:BWD 2 "memory_operand" "+Q")
|
|
237 (match_operand:BWD 3 "nonmemory_operand" "ri")]
|
|
238 CRIS_UNSPEC_ATOMIC_SWAP_BOOL))
|
|
239 (set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2))
|
|
240 (set (match_dup 2)
|
|
241 (unspec_volatile:BWD
|
|
242 [(match_dup 2)
|
|
243 (match_dup 3)
|
|
244 (match_operand:BWD 4 "register_operand" "r")]
|
|
245 CRIS_UNSPEC_ATOMIC_SWAP_MEM))]
|
|
246 "<MODE>mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS"
|
|
247 {
|
|
248 if (TARGET_V32)
|
|
249 return
|
|
250 "\n.Lsync.repeat.%=:\;"
|
|
251 "clearf p\;"
|
|
252 "move<m> %2,%1\;"
|
|
253 "cmp<qm3> %3,%1\;"
|
|
254 "bne .Lsync.after.%=\;"
|
|
255 "ax\;"
|
|
256
|
|
257 "move<m> %4,%2\;"
|
|
258 "bcs .Lsync.repeat.%=\n"
|
|
259 ".Lsync.after.%=:\;"
|
|
260 "seq %0";
|
|
261 else if (cris_cpu_version == 10)
|
|
262 return
|
|
263 "\n.Lsync.repeat.%=:\;"
|
|
264 "clearf\;"
|
|
265 "move<m> %2,%1\;"
|
|
266 "cmp<qm3> %3,%1\;"
|
|
267 "bne .Lsync.after.%=\;"
|
|
268 "ax\;"
|
|
269
|
|
270 "move<m> %4,%2\;"
|
|
271 "bwf .Lsync.repeat.%=\n"
|
|
272 ".Lsync.after.%=:\;"
|
|
273 "seq %0";
|
|
274 else
|
|
275 {
|
|
276 /* This one is for CRIS versions without load-locked-store-conditional
|
|
277 machinery; assume single-core-non-shared-memory without user
|
|
278 mode/supervisor mode distinction, and just disable interrupts
|
|
279 while performing the operation.
|
|
280 Rather than making this pattern more complex by freeing another
|
|
281 register or stack position to save condition codes (the value
|
|
282 of the interrupt-enabled bit), we check whether interrupts were
|
|
283 enabled before we disabled them and branch to a version
|
|
284 with/without afterwards re-enabling them. */
|
|
285 rtx ops[4];
|
|
286
|
|
287 /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
|
|
288 memcpy (ops, operands, sizeof(ops));
|
|
289 ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
|
|
290
|
|
291 output_asm_insn ("move $ccr,%0\;"
|
|
292 "di\;"
|
|
293 "move<m> %2,%1\;"
|
|
294 "btstq %3,%0",
|
|
295 ops);
|
|
296 return
|
|
297 "bmi .Lsync.irqon.%=\;"
|
|
298 "nop\;"
|
|
299
|
|
300 "cmp<qm3> %3,%1\;"
|
|
301 "bne .Lsync.after.%=\;"
|
|
302 "seq %0\;"
|
|
303 "ba .Lsync.after.%=\;"
|
|
304 "move<m> %4,%2\n"
|
|
305
|
|
306 ".Lsync.irqon.%=:\;"
|
|
307 "cmp<qm3> %3,%1\;"
|
|
308 "bne .Lsync.after.%=\;"
|
|
309 "seq %0\;"
|
|
310 "move<m> %4,%2\;"
|
|
311 "ei\n"
|
|
312 ".Lsync.after.%=:";
|
|
313 }
|
|
314 })
|