Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/i386/sync.md @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
0 | 1 ;; GCC machine description for i386 synchronization instructions. |
145 | 2 ;; Copyright (C) 2005-2020 Free Software Foundation, Inc. |
0 | 3 ;; |
4 ;; This file is part of GCC. | |
5 ;; | |
6 ;; GCC is free software; you can redistribute it and/or modify | |
7 ;; it under the terms of the GNU General Public License as published by | |
8 ;; the Free Software Foundation; either version 3, or (at your option) | |
9 ;; any later version. | |
10 ;; | |
11 ;; GCC is distributed in the hope that it will be useful, | |
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 ;; GNU General Public License for more details. | |
15 ;; | |
16 ;; You should have received a copy of the GNU General Public License | |
17 ;; along with GCC; see the file COPYING3. If not see | |
18 ;; <http://www.gnu.org/licenses/>. | |
19 | |
111 | 20 (define_c_enum "unspec" [ |
21 UNSPEC_LFENCE | |
22 UNSPEC_SFENCE | |
23 UNSPEC_MFENCE | |
24 | |
25 UNSPEC_FILD_ATOMIC | |
26 UNSPEC_FIST_ATOMIC | |
27 | |
28 UNSPEC_LDX_ATOMIC | |
29 UNSPEC_STX_ATOMIC | |
0 | 30 |
111 | 31 ;; __atomic support |
32 UNSPEC_LDA | |
33 UNSPEC_STA | |
34 ]) | |
35 | |
36 (define_c_enum "unspecv" [ | |
37 UNSPECV_CMPXCHG | |
38 UNSPECV_XCHG | |
39 UNSPECV_LOCK | |
40 ]) | |
41 | |
42 (define_expand "sse2_lfence" | |
0 | 43 [(set (match_dup 0) |
111 | 44 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] |
45 "TARGET_SSE2" | |
0 | 46 { |
47 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
48 MEM_VOLATILE_P (operands[0]) = 1; | |
111 | 49 }) |
0 | 50 |
111 | 51 (define_insn "*sse2_lfence" |
52 [(set (match_operand:BLK 0) | |
53 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] | |
54 "TARGET_SSE2" | |
55 "lfence" | |
56 [(set_attr "type" "sse") | |
57 (set_attr "length_address" "0") | |
58 (set_attr "atom_sse_attr" "lfence") | |
59 (set_attr "memory" "unknown")]) | |
60 | |
61 (define_expand "sse_sfence" | |
62 [(set (match_dup 0) | |
63 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] | |
64 "TARGET_SSE || TARGET_3DNOW_A" | |
65 { | |
66 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
67 MEM_VOLATILE_P (operands[0]) = 1; | |
0 | 68 }) |
69 | |
111 | 70 (define_insn "*sse_sfence" |
71 [(set (match_operand:BLK 0) | |
72 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] | |
73 "TARGET_SSE || TARGET_3DNOW_A" | |
74 "sfence" | |
75 [(set_attr "type" "sse") | |
76 (set_attr "length_address" "0") | |
77 (set_attr "atom_sse_attr" "fence") | |
78 (set_attr "memory" "unknown")]) | |
79 | |
80 (define_expand "sse2_mfence" | |
81 [(set (match_dup 0) | |
82 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] | |
83 "TARGET_SSE2" | |
84 { | |
85 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
86 MEM_VOLATILE_P (operands[0]) = 1; | |
87 }) | |
88 | |
89 (define_insn "mfence_sse2" | |
90 [(set (match_operand:BLK 0) | |
91 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] | |
92 "TARGET_64BIT || TARGET_SSE2" | |
93 "mfence" | |
94 [(set_attr "type" "sse") | |
95 (set_attr "length_address" "0") | |
96 (set_attr "atom_sse_attr" "fence") | |
97 (set_attr "memory" "unknown")]) | |
98 | |
99 (define_insn "mfence_nosse" | |
100 [(set (match_operand:BLK 0) | |
0 | 101 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) |
102 (clobber (reg:CC FLAGS_REG))] | |
103 "!(TARGET_64BIT || TARGET_SSE2)" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
104 "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" |
0 | 105 [(set_attr "memory" "unknown")]) |
106 | |
111 | 107 (define_expand "mem_thread_fence" |
108 [(match_operand:SI 0 "const_int_operand")] ;; model | |
109 "" | |
110 { | |
111 enum memmodel model = memmodel_from_int (INTVAL (operands[0])); | |
112 | |
113 /* Unless this is a SEQ_CST fence, the i386 memory model is strong | |
114 enough not to require barriers of any kind. */ | |
115 if (is_mm_seq_cst (model)) | |
116 { | |
117 rtx (*mfence_insn)(rtx); | |
118 rtx mem; | |
119 | |
120 if (TARGET_64BIT || TARGET_SSE2) | |
121 mfence_insn = gen_mfence_sse2; | |
122 else | |
123 mfence_insn = gen_mfence_nosse; | |
124 | |
125 mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); | |
126 MEM_VOLATILE_P (mem) = 1; | |
127 | |
128 emit_insn (mfence_insn (mem)); | |
129 } | |
130 DONE; | |
131 }) | |
132 | |
133 ;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations, | |
134 ;; Only beginning at Pentium family processors do we get any guarantee of | |
135 ;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a | |
136 ;; guarantee for 64-bit accesses that do not cross a cacheline boundary. | |
137 ;; | |
138 ;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium". | |
139 ;; | |
140 ;; Importantly, *no* processor makes atomicity guarantees for larger | |
141 ;; accesses. In particular, there's no way to perform an atomic TImode | |
142 ;; move, despite the apparent applicability of MOVDQA et al. | |
143 | |
144 (define_mode_iterator ATOMIC | |
145 [QI HI SI | |
146 (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))") | |
147 ]) | |
148 | |
149 (define_expand "atomic_load<mode>" | |
150 [(set (match_operand:ATOMIC 0 "nonimmediate_operand") | |
151 (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand") | |
152 (match_operand:SI 2 "const_int_operand")] | |
153 UNSPEC_LDA))] | |
154 "" | |
155 { | |
156 /* For DImode on 32-bit, we can use the FPU to perform the load. */ | |
157 if (<MODE>mode == DImode && !TARGET_64BIT) | |
158 emit_insn (gen_atomic_loaddi_fpu | |
159 (operands[0], operands[1], | |
160 assign_386_stack_local (DImode, SLOT_TEMP))); | |
161 else | |
162 { | |
163 rtx dst = operands[0]; | |
164 | |
165 if (MEM_P (dst)) | |
166 dst = gen_reg_rtx (<MODE>mode); | |
167 | |
168 emit_move_insn (dst, operands[1]); | |
0 | 169 |
111 | 170 /* Fix up the destination if needed. */ |
171 if (dst != operands[0]) | |
172 emit_move_insn (operands[0], dst); | |
173 } | |
174 DONE; | |
175 }) | |
176 | |
177 (define_insn_and_split "atomic_loaddi_fpu" | |
178 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r") | |
179 (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")] | |
180 UNSPEC_LDA)) | |
181 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) | |
182 (clobber (match_scratch:DF 3 "=X,xf,xf"))] | |
183 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" | |
184 "#" | |
185 "&& reload_completed" | |
186 [(const_int 0)] | |
187 { | |
188 rtx dst = operands[0], src = operands[1]; | |
189 rtx mem = operands[2], tmp = operands[3]; | |
190 | |
191 if (SSE_REG_P (dst)) | |
192 emit_move_insn (dst, src); | |
193 else | |
194 { | |
195 if (MEM_P (dst)) | |
196 mem = dst; | |
197 | |
198 if (STACK_REG_P (tmp)) | |
199 { | |
200 emit_insn (gen_loaddi_via_fpu (tmp, src)); | |
201 emit_insn (gen_storedi_via_fpu (mem, tmp)); | |
202 } | |
203 else | |
204 { | |
205 emit_insn (gen_loaddi_via_sse (tmp, src)); | |
206 emit_insn (gen_storedi_via_sse (mem, tmp)); | |
207 } | |
208 | |
209 if (mem != dst) | |
210 emit_move_insn (dst, mem); | |
211 } | |
212 DONE; | |
213 }) | |
214 | |
215 (define_peephole2 | |
216 [(set (match_operand:DF 0 "fp_register_operand") | |
217 (unspec:DF [(match_operand:DI 1 "memory_operand")] | |
218 UNSPEC_FILD_ATOMIC)) | |
219 (set (match_operand:DI 2 "memory_operand") | |
220 (unspec:DI [(match_dup 0)] | |
221 UNSPEC_FIST_ATOMIC)) | |
222 (set (match_operand:DF 3 "any_fp_register_operand") | |
223 (match_operand:DF 4 "memory_operand"))] | |
224 "!TARGET_64BIT | |
225 && peep2_reg_dead_p (2, operands[0]) | |
226 && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" | |
227 [(set (match_dup 3) (match_dup 5))] | |
228 "operands[5] = gen_lowpart (DFmode, operands[1]);") | |
229 | |
230 (define_peephole2 | |
231 [(set (match_operand:DF 0 "fp_register_operand") | |
232 (unspec:DF [(match_operand:DI 1 "memory_operand")] | |
233 UNSPEC_FILD_ATOMIC)) | |
234 (set (match_operand:DI 2 "memory_operand") | |
235 (unspec:DI [(match_dup 0)] | |
236 UNSPEC_FIST_ATOMIC)) | |
237 (set (mem:BLK (scratch:SI)) | |
238 (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) | |
239 (set (match_operand:DF 3 "any_fp_register_operand") | |
240 (match_operand:DF 4 "memory_operand"))] | |
241 "!TARGET_64BIT | |
242 && peep2_reg_dead_p (2, operands[0]) | |
243 && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" | |
244 [(const_int 0)] | |
0 | 245 { |
111 | 246 emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); |
247 emit_insn (gen_memory_blockage ()); | |
248 DONE; | |
249 }) | |
250 | |
251 (define_peephole2 | |
252 [(set (match_operand:DF 0 "sse_reg_operand") | |
253 (unspec:DF [(match_operand:DI 1 "memory_operand")] | |
254 UNSPEC_LDX_ATOMIC)) | |
255 (set (match_operand:DI 2 "memory_operand") | |
256 (unspec:DI [(match_dup 0)] | |
257 UNSPEC_STX_ATOMIC)) | |
258 (set (match_operand:DF 3 "any_fp_register_operand") | |
259 (match_operand:DF 4 "memory_operand"))] | |
260 "!TARGET_64BIT | |
261 && peep2_reg_dead_p (2, operands[0]) | |
262 && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" | |
263 [(set (match_dup 3) (match_dup 5))] | |
264 "operands[5] = gen_lowpart (DFmode, operands[1]);") | |
265 | |
266 (define_peephole2 | |
267 [(set (match_operand:DF 0 "sse_reg_operand") | |
268 (unspec:DF [(match_operand:DI 1 "memory_operand")] | |
269 UNSPEC_LDX_ATOMIC)) | |
270 (set (match_operand:DI 2 "memory_operand") | |
271 (unspec:DI [(match_dup 0)] | |
272 UNSPEC_STX_ATOMIC)) | |
273 (set (mem:BLK (scratch:SI)) | |
274 (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) | |
275 (set (match_operand:DF 3 "any_fp_register_operand") | |
276 (match_operand:DF 4 "memory_operand"))] | |
277 "!TARGET_64BIT | |
278 && peep2_reg_dead_p (2, operands[0]) | |
279 && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" | |
280 [(const_int 0)] | |
281 { | |
282 emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); | |
283 emit_insn (gen_memory_blockage ()); | |
284 DONE; | |
285 }) | |
286 | |
287 (define_expand "atomic_store<mode>" | |
288 [(set (match_operand:ATOMIC 0 "memory_operand") | |
289 (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand") | |
290 (match_operand:SI 2 "const_int_operand")] | |
291 UNSPEC_STA))] | |
292 "" | |
293 { | |
294 enum memmodel model = memmodel_from_int (INTVAL (operands[2])); | |
295 | |
296 if (<MODE>mode == DImode && !TARGET_64BIT) | |
0 | 297 { |
111 | 298 /* For DImode on 32-bit, we can use the FPU to perform the store. */ |
299 /* Note that while we could perform a cmpxchg8b loop, that turns | |
300 out to be significantly larger than this plus a barrier. */ | |
301 emit_insn (gen_atomic_storedi_fpu | |
302 (operands[0], operands[1], | |
303 assign_386_stack_local (DImode, SLOT_TEMP))); | |
304 } | |
305 else | |
306 { | |
307 operands[1] = force_reg (<MODE>mode, operands[1]); | |
308 | |
145 | 309 /* For seq-cst stores, use XCHG when we lack MFENCE |
310 or when target prefers XCHG. */ | |
311 if (is_mm_seq_cst (model) | |
312 && (!(TARGET_64BIT || TARGET_SSE2) | |
313 || TARGET_USE_XCHG_FOR_ATOMIC_STORE)) | |
0 | 314 { |
111 | 315 emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode), |
316 operands[0], operands[1], | |
317 operands[2])); | |
318 DONE; | |
319 } | |
320 | |
321 /* Otherwise use a store. */ | |
322 emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1], | |
323 operands[2])); | |
324 } | |
325 /* ... followed by an MFENCE, if required. */ | |
326 if (is_mm_seq_cst (model)) | |
327 emit_insn (gen_mem_thread_fence (operands[2])); | |
328 DONE; | |
329 }) | |
330 | |
331 (define_insn "atomic_store<mode>_1" | |
332 [(set (match_operand:SWI 0 "memory_operand" "=m") | |
333 (unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>") | |
334 (match_operand:SI 2 "const_int_operand")] | |
335 UNSPEC_STA))] | |
336 "" | |
337 "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}") | |
338 | |
339 (define_insn_and_split "atomic_storedi_fpu" | |
340 [(set (match_operand:DI 0 "memory_operand" "=m,m,m") | |
341 (unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")] | |
342 UNSPEC_STA)) | |
343 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m")) | |
344 (clobber (match_scratch:DF 3 "=X,xf,xf"))] | |
345 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)" | |
346 "#" | |
347 "&& reload_completed" | |
348 [(const_int 0)] | |
349 { | |
350 rtx dst = operands[0], src = operands[1]; | |
351 rtx mem = operands[2], tmp = operands[3]; | |
352 | |
353 if (SSE_REG_P (src)) | |
354 emit_move_insn (dst, src); | |
355 else | |
356 { | |
357 if (REG_P (src)) | |
358 { | |
359 emit_move_insn (mem, src); | |
360 src = mem; | |
0 | 361 } |
111 | 362 |
363 if (STACK_REG_P (tmp)) | |
364 { | |
365 emit_insn (gen_loaddi_via_fpu (tmp, src)); | |
366 emit_insn (gen_storedi_via_fpu (dst, tmp)); | |
367 } | |
0 | 368 else |
111 | 369 { |
370 emit_insn (gen_loaddi_via_sse (tmp, src)); | |
371 emit_insn (gen_storedi_via_sse (dst, tmp)); | |
372 } | |
0 | 373 } |
111 | 374 DONE; |
375 }) | |
376 | |
377 (define_peephole2 | |
378 [(set (match_operand:DF 0 "memory_operand") | |
379 (match_operand:DF 1 "any_fp_register_operand")) | |
380 (set (match_operand:DF 2 "fp_register_operand") | |
381 (unspec:DF [(match_operand:DI 3 "memory_operand")] | |
382 UNSPEC_FILD_ATOMIC)) | |
383 (set (match_operand:DI 4 "memory_operand") | |
384 (unspec:DI [(match_dup 2)] | |
385 UNSPEC_FIST_ATOMIC))] | |
386 "!TARGET_64BIT | |
387 && peep2_reg_dead_p (3, operands[2]) | |
388 && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" | |
389 [(set (match_dup 5) (match_dup 1))] | |
390 "operands[5] = gen_lowpart (DFmode, operands[4]);") | |
391 | |
392 (define_peephole2 | |
393 [(set (match_operand:DF 0 "memory_operand") | |
394 (match_operand:DF 1 "any_fp_register_operand")) | |
395 (set (mem:BLK (scratch:SI)) | |
396 (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) | |
397 (set (match_operand:DF 2 "fp_register_operand") | |
398 (unspec:DF [(match_operand:DI 3 "memory_operand")] | |
399 UNSPEC_FILD_ATOMIC)) | |
400 (set (match_operand:DI 4 "memory_operand") | |
401 (unspec:DI [(match_dup 2)] | |
402 UNSPEC_FIST_ATOMIC))] | |
403 "!TARGET_64BIT | |
404 && peep2_reg_dead_p (4, operands[2]) | |
405 && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" | |
406 [(const_int 0)] | |
407 { | |
408 emit_insn (gen_memory_blockage ()); | |
409 emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); | |
410 DONE; | |
0 | 411 }) |
412 | |
111 | 413 (define_peephole2 |
414 [(set (match_operand:DF 0 "memory_operand") | |
415 (match_operand:DF 1 "any_fp_register_operand")) | |
416 (set (match_operand:DF 2 "sse_reg_operand") | |
417 (unspec:DF [(match_operand:DI 3 "memory_operand")] | |
418 UNSPEC_LDX_ATOMIC)) | |
419 (set (match_operand:DI 4 "memory_operand") | |
420 (unspec:DI [(match_dup 2)] | |
421 UNSPEC_STX_ATOMIC))] | |
422 "!TARGET_64BIT | |
423 && peep2_reg_dead_p (3, operands[2]) | |
424 && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" | |
425 [(set (match_dup 5) (match_dup 1))] | |
426 "operands[5] = gen_lowpart (DFmode, operands[4]);") | |
427 | |
428 (define_peephole2 | |
429 [(set (match_operand:DF 0 "memory_operand") | |
430 (match_operand:DF 1 "any_fp_register_operand")) | |
431 (set (mem:BLK (scratch:SI)) | |
432 (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) | |
433 (set (match_operand:DF 2 "sse_reg_operand") | |
434 (unspec:DF [(match_operand:DI 3 "memory_operand")] | |
435 UNSPEC_LDX_ATOMIC)) | |
436 (set (match_operand:DI 4 "memory_operand") | |
437 (unspec:DI [(match_dup 2)] | |
438 UNSPEC_STX_ATOMIC))] | |
439 "!TARGET_64BIT | |
440 && peep2_reg_dead_p (4, operands[2]) | |
441 && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" | |
442 [(const_int 0)] | |
443 { | |
444 emit_insn (gen_memory_blockage ()); | |
445 emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); | |
446 DONE; | |
447 }) | |
448 | |
449 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC | |
450 ;; operations. But the fix_trunc patterns want way more setup than we want | |
451 ;; to provide. Note that the scratch is DFmode instead of XFmode in order | |
452 ;; to make it easy to allocate a scratch in either SSE or FP_REGs above. | |
453 | |
454 (define_insn "loaddi_via_fpu" | |
455 [(set (match_operand:DF 0 "register_operand" "=f") | |
456 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] | |
457 UNSPEC_FILD_ATOMIC))] | |
458 "TARGET_80387" | |
459 "fild%Z1\t%1" | |
460 [(set_attr "type" "fmov") | |
461 (set_attr "mode" "DF") | |
462 (set_attr "fp_int_src" "true")]) | |
463 | |
464 (define_insn "storedi_via_fpu" | |
465 [(set (match_operand:DI 0 "memory_operand" "=m") | |
466 (unspec:DI [(match_operand:DF 1 "register_operand" "f")] | |
467 UNSPEC_FIST_ATOMIC))] | |
468 "TARGET_80387" | |
469 { | |
470 gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX); | |
471 | |
472 return "fistp%Z0\t%0"; | |
473 } | |
474 [(set_attr "type" "fmov") | |
475 (set_attr "mode" "DI")]) | |
476 | |
477 (define_insn "loaddi_via_sse" | |
478 [(set (match_operand:DF 0 "register_operand" "=x") | |
479 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] | |
480 UNSPEC_LDX_ATOMIC))] | |
481 "TARGET_SSE" | |
482 { | |
483 if (TARGET_SSE2) | |
484 return "%vmovq\t{%1, %0|%0, %1}"; | |
485 return "movlps\t{%1, %0|%0, %1}"; | |
486 } | |
487 [(set_attr "type" "ssemov") | |
488 (set_attr "mode" "DI")]) | |
489 | |
490 (define_insn "storedi_via_sse" | |
491 [(set (match_operand:DI 0 "memory_operand" "=m") | |
492 (unspec:DI [(match_operand:DF 1 "register_operand" "x")] | |
493 UNSPEC_STX_ATOMIC))] | |
494 "TARGET_SSE" | |
495 { | |
496 if (TARGET_SSE2) | |
497 return "%vmovq\t{%1, %0|%0, %1}"; | |
498 return "movlps\t{%1, %0|%0, %1}"; | |
499 } | |
500 [(set_attr "type" "ssemov") | |
501 (set_attr "mode" "DI")]) | |
502 | |
503 (define_expand "atomic_compare_and_swap<mode>" | |
504 [(match_operand:QI 0 "register_operand") ;; bool success output | |
505 (match_operand:SWI124 1 "register_operand") ;; oldval output | |
506 (match_operand:SWI124 2 "memory_operand") ;; memory | |
507 (match_operand:SWI124 3 "register_operand") ;; expected input | |
508 (match_operand:SWI124 4 "register_operand") ;; newval input | |
509 (match_operand:SI 5 "const_int_operand") ;; is_weak | |
510 (match_operand:SI 6 "const_int_operand") ;; success model | |
511 (match_operand:SI 7 "const_int_operand")] ;; failure model | |
512 "TARGET_CMPXCHG" | |
513 { | |
514 emit_insn | |
515 (gen_atomic_compare_and_swap<mode>_1 | |
516 (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
517 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), | |
518 const0_rtx); | |
519 DONE; | |
520 }) | |
521 | |
522 (define_mode_iterator CASMODE | |
523 [(DI "TARGET_64BIT || TARGET_CMPXCHG8B") | |
524 (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) | |
525 (define_mode_attr CASHMODE [(DI "SI") (TI "DI")]) | |
526 | |
527 (define_expand "atomic_compare_and_swap<mode>" | |
528 [(match_operand:QI 0 "register_operand") ;; bool success output | |
529 (match_operand:CASMODE 1 "register_operand") ;; oldval output | |
530 (match_operand:CASMODE 2 "memory_operand") ;; memory | |
531 (match_operand:CASMODE 3 "register_operand") ;; expected input | |
532 (match_operand:CASMODE 4 "register_operand") ;; newval input | |
533 (match_operand:SI 5 "const_int_operand") ;; is_weak | |
534 (match_operand:SI 6 "const_int_operand") ;; success model | |
535 (match_operand:SI 7 "const_int_operand")] ;; failure model | |
536 "TARGET_CMPXCHG" | |
537 { | |
538 if (<MODE>mode == DImode && TARGET_64BIT) | |
539 { | |
540 emit_insn | |
541 (gen_atomic_compare_and_swapdi_1 | |
542 (operands[1], operands[2], operands[3], operands[4], operands[6])); | |
543 } | |
544 else | |
545 { | |
546 machine_mode hmode = <CASHMODE>mode; | |
547 | |
548 emit_insn | |
549 (gen_atomic_compare_and_swap<mode>_doubleword | |
550 (operands[1], operands[2], operands[3], | |
551 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), | |
552 operands[6])); | |
553 } | |
554 | |
555 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), | |
556 const0_rtx); | |
557 DONE; | |
558 }) | |
559 | |
560 ;; For double-word compare and swap, we are obliged to play tricks with | |
561 ;; the input newval (op3:op4) because the Intel register numbering does | |
562 ;; not match the gcc register numbering, so the pair must be CX:BX. | |
563 | |
564 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")]) | |
565 | |
566 (define_insn "atomic_compare_and_swap<dwi>_doubleword" | |
567 [(set (match_operand:<DWI> 0 "register_operand" "=A") | |
568 (unspec_volatile:<DWI> | |
569 [(match_operand:<DWI> 1 "memory_operand" "+m") | |
570 (match_operand:<DWI> 2 "register_operand" "0") | |
571 (match_operand:DWIH 3 "register_operand" "b") | |
572 (match_operand:DWIH 4 "register_operand" "c") | |
573 (match_operand:SI 5 "const_int_operand")] | |
0 | 574 UNSPECV_CMPXCHG)) |
575 (set (match_dup 1) | |
111 | 576 (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG)) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
577 (set (reg:CCZ FLAGS_REG) |
111 | 578 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))] |
579 "TARGET_CMPXCHG<doublemodesuffix>B" | |
580 "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1") | |
0 | 581 |
111 | 582 (define_insn "atomic_compare_and_swap<mode>_1" |
583 [(set (match_operand:SWI 0 "register_operand" "=a") | |
584 (unspec_volatile:SWI | |
585 [(match_operand:SWI 1 "memory_operand" "+m") | |
586 (match_operand:SWI 2 "register_operand" "0") | |
587 (match_operand:SWI 3 "register_operand" "<r>") | |
588 (match_operand:SI 4 "const_int_operand")] | |
589 UNSPECV_CMPXCHG)) | |
0 | 590 (set (match_dup 1) |
111 | 591 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG)) |
0 | 592 (set (reg:CCZ FLAGS_REG) |
111 | 593 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))] |
594 "TARGET_CMPXCHG" | |
595 "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}") | |
0 | 596 |
111 | 597 ;; For operand 2 nonmemory_operand predicate is used instead of |
598 ;; register_operand to allow combiner to better optimize atomic | |
599 ;; additions of constants. | |
600 (define_insn "atomic_fetch_add<mode>" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
601 [(set (match_operand:SWI 0 "register_operand" "=<r>") |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
602 (unspec_volatile:SWI |
111 | 603 [(match_operand:SWI 1 "memory_operand" "+m") |
604 (match_operand:SI 3 "const_int_operand")] ;; model | |
605 UNSPECV_XCHG)) | |
0 | 606 (set (match_dup 1) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
607 (plus:SWI (match_dup 1) |
111 | 608 (match_operand:SWI 2 "nonmemory_operand" "0"))) |
0 | 609 (clobber (reg:CC FLAGS_REG))] |
610 "TARGET_XADD" | |
111 | 611 "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}") |
612 | |
613 ;; This peephole2 and following insn optimize | |
614 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec} | |
615 ;; followed by testing of flags instead of lock xadd and comparisons. | |
616 (define_peephole2 | |
617 [(set (match_operand:SWI 0 "register_operand") | |
618 (match_operand:SWI 2 "const_int_operand")) | |
619 (parallel [(set (match_dup 0) | |
620 (unspec_volatile:SWI | |
621 [(match_operand:SWI 1 "memory_operand") | |
622 (match_operand:SI 4 "const_int_operand")] | |
623 UNSPECV_XCHG)) | |
624 (set (match_dup 1) | |
625 (plus:SWI (match_dup 1) | |
626 (match_dup 0))) | |
627 (clobber (reg:CC FLAGS_REG))]) | |
628 (set (reg:CCZ FLAGS_REG) | |
629 (compare:CCZ (match_dup 0) | |
630 (match_operand:SWI 3 "const_int_operand")))] | |
631 "peep2_reg_dead_p (3, operands[0]) | |
632 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) | |
633 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) | |
634 && !reg_overlap_mentioned_p (operands[0], operands[1])" | |
635 [(parallel [(set (reg:CCZ FLAGS_REG) | |
636 (compare:CCZ | |
637 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] | |
638 UNSPECV_XCHG) | |
639 (match_dup 3))) | |
640 (set (match_dup 1) | |
641 (plus:SWI (match_dup 1) | |
642 (match_dup 2)))])]) | |
643 | |
644 ;; Likewise, but for the -Os special case of *mov<mode>_or. | |
645 (define_peephole2 | |
646 [(parallel [(set (match_operand:SWI 0 "register_operand") | |
647 (match_operand:SWI 2 "constm1_operand")) | |
648 (clobber (reg:CC FLAGS_REG))]) | |
649 (parallel [(set (match_dup 0) | |
650 (unspec_volatile:SWI | |
651 [(match_operand:SWI 1 "memory_operand") | |
652 (match_operand:SI 4 "const_int_operand")] | |
653 UNSPECV_XCHG)) | |
654 (set (match_dup 1) | |
655 (plus:SWI (match_dup 1) | |
656 (match_dup 0))) | |
657 (clobber (reg:CC FLAGS_REG))]) | |
658 (set (reg:CCZ FLAGS_REG) | |
659 (compare:CCZ (match_dup 0) | |
660 (match_operand:SWI 3 "const_int_operand")))] | |
661 "peep2_reg_dead_p (3, operands[0]) | |
662 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) | |
663 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) | |
664 && !reg_overlap_mentioned_p (operands[0], operands[1])" | |
665 [(parallel [(set (reg:CCZ FLAGS_REG) | |
666 (compare:CCZ | |
667 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] | |
668 UNSPECV_XCHG) | |
669 (match_dup 3))) | |
670 (set (match_dup 1) | |
671 (plus:SWI (match_dup 1) | |
672 (match_dup 2)))])]) | |
673 | |
674 (define_insn "*atomic_fetch_add_cmp<mode>" | |
675 [(set (reg:CCZ FLAGS_REG) | |
676 (compare:CCZ | |
677 (unspec_volatile:SWI | |
678 [(match_operand:SWI 0 "memory_operand" "+m") | |
679 (match_operand:SI 3 "const_int_operand")] ;; model | |
680 UNSPECV_XCHG) | |
681 (match_operand:SWI 2 "const_int_operand" "i"))) | |
682 (set (match_dup 0) | |
683 (plus:SWI (match_dup 0) | |
684 (match_operand:SWI 1 "const_int_operand" "i")))] | |
685 "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) | |
686 == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])" | |
687 { | |
688 if (incdec_operand (operands[1], <MODE>mode)) | |
689 { | |
690 if (operands[1] == const1_rtx) | |
691 return "lock{%;} %K3inc{<imodesuffix>}\t%0"; | |
692 else | |
693 { | |
694 gcc_assert (operands[1] == constm1_rtx); | |
695 return "lock{%;} %K3dec{<imodesuffix>}\t%0"; | |
696 } | |
697 } | |
698 | |
699 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) | |
700 return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}"; | |
701 | |
702 return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}"; | |
703 }) | |
0 | 704 |
705 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. | |
111 | 706 ;; In addition, it is always a full barrier, so we can ignore the memory model. |
707 (define_insn "atomic_exchange<mode>" | |
708 [(set (match_operand:SWI 0 "register_operand" "=<r>") ;; output | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
709 (unspec_volatile:SWI |
111 | 710 [(match_operand:SWI 1 "memory_operand" "+m") ;; memory |
711 (match_operand:SI 3 "const_int_operand")] ;; model | |
712 UNSPECV_XCHG)) | |
0 | 713 (set (match_dup 1) |
111 | 714 (match_operand:SWI 2 "register_operand" "0"))] ;; input |
0 | 715 "" |
111 | 716 "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}") |
0 | 717 |
111 | 718 (define_insn "atomic_add<mode>" |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
719 [(set (match_operand:SWI 0 "memory_operand" "+m") |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
720 (unspec_volatile:SWI |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
721 [(plus:SWI (match_dup 0) |
111 | 722 (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) |
723 (match_operand:SI 2 "const_int_operand")] ;; model | |
0 | 724 UNSPECV_LOCK)) |
725 (clobber (reg:CC FLAGS_REG))] | |
726 "" | |
727 { | |
111 | 728 if (incdec_operand (operands[1], <MODE>mode)) |
0 | 729 { |
730 if (operands[1] == const1_rtx) | |
111 | 731 return "lock{%;} %K2inc{<imodesuffix>}\t%0"; |
732 else | |
733 { | |
734 gcc_assert (operands[1] == constm1_rtx); | |
735 return "lock{%;} %K2dec{<imodesuffix>}\t%0"; | |
736 } | |
0 | 737 } |
738 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
739 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) |
111 | 740 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}"; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
741 |
111 | 742 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}"; |
0 | 743 }) |
744 | |
111 | 745 (define_insn "atomic_sub<mode>" |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
746 [(set (match_operand:SWI 0 "memory_operand" "+m") |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
747 (unspec_volatile:SWI |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
748 [(minus:SWI (match_dup 0) |
111 | 749 (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) |
750 (match_operand:SI 2 "const_int_operand")] ;; model | |
0 | 751 UNSPECV_LOCK)) |
752 (clobber (reg:CC FLAGS_REG))] | |
753 "" | |
754 { | |
111 | 755 if (incdec_operand (operands[1], <MODE>mode)) |
0 | 756 { |
757 if (operands[1] == const1_rtx) | |
111 | 758 return "lock{%;} %K2dec{<imodesuffix>}\t%0"; |
759 else | |
760 { | |
761 gcc_assert (operands[1] == constm1_rtx); | |
762 return "lock{%;} %K2inc{<imodesuffix>}\t%0"; | |
763 } | |
0 | 764 } |
765 | |
111 | 766 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) |
767 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}"; | |
768 | |
769 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}"; | |
0 | 770 }) |
771 | |
111 | 772 (define_insn "atomic_<logic><mode>" |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
773 [(set (match_operand:SWI 0 "memory_operand" "+m") |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
774 (unspec_volatile:SWI |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
775 [(any_logic:SWI (match_dup 0) |
111 | 776 (match_operand:SWI 1 "nonmemory_operand" "<r><i>")) |
777 (match_operand:SI 2 "const_int_operand")] ;; model | |
0 | 778 UNSPECV_LOCK)) |
779 (clobber (reg:CC FLAGS_REG))] | |
780 "" | |
111 | 781 "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}") |
782 | |
783 (define_expand "atomic_bit_test_and_set<mode>" | |
784 [(match_operand:SWI248 0 "register_operand") | |
785 (match_operand:SWI248 1 "memory_operand") | |
786 (match_operand:SWI248 2 "nonmemory_operand") | |
787 (match_operand:SI 3 "const_int_operand") ;; model | |
788 (match_operand:SI 4 "const_int_operand")] | |
789 "" | |
790 { | |
791 emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2], | |
792 operands[3])); | |
793 rtx tem = gen_reg_rtx (QImode); | |
794 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); | |
795 rtx result = convert_modes (<MODE>mode, QImode, tem, 1); | |
796 if (operands[4] == const0_rtx) | |
797 result = expand_simple_binop (<MODE>mode, ASHIFT, result, | |
798 operands[2], operands[0], 0, OPTAB_DIRECT); | |
799 if (result != operands[0]) | |
800 emit_move_insn (operands[0], result); | |
801 DONE; | |
802 }) | |
803 | |
804 (define_insn "atomic_bit_test_and_set<mode>_1" | |
805 [(set (reg:CCC FLAGS_REG) | |
806 (compare:CCC | |
807 (unspec_volatile:SWI248 | |
808 [(match_operand:SWI248 0 "memory_operand" "+m") | |
809 (match_operand:SI 2 "const_int_operand")] ;; model | |
810 UNSPECV_XCHG) | |
811 (const_int 0))) | |
812 (set (zero_extract:SWI248 (match_dup 0) | |
813 (const_int 1) | |
814 (match_operand:SWI248 1 "nonmemory_operand" "rN")) | |
815 (const_int 1))] | |
816 "" | |
817 "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}") | |
818 | |
819 (define_expand "atomic_bit_test_and_complement<mode>" | |
820 [(match_operand:SWI248 0 "register_operand") | |
821 (match_operand:SWI248 1 "memory_operand") | |
822 (match_operand:SWI248 2 "nonmemory_operand") | |
823 (match_operand:SI 3 "const_int_operand") ;; model | |
824 (match_operand:SI 4 "const_int_operand")] | |
825 "" | |
826 { | |
827 emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1], | |
828 operands[2], | |
829 operands[3])); | |
830 rtx tem = gen_reg_rtx (QImode); | |
831 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); | |
832 rtx result = convert_modes (<MODE>mode, QImode, tem, 1); | |
833 if (operands[4] == const0_rtx) | |
834 result = expand_simple_binop (<MODE>mode, ASHIFT, result, | |
835 operands[2], operands[0], 0, OPTAB_DIRECT); | |
836 if (result != operands[0]) | |
837 emit_move_insn (operands[0], result); | |
838 DONE; | |
839 }) | |
840 | |
841 (define_insn "atomic_bit_test_and_complement<mode>_1" | |
842 [(set (reg:CCC FLAGS_REG) | |
843 (compare:CCC | |
844 (unspec_volatile:SWI248 | |
845 [(match_operand:SWI248 0 "memory_operand" "+m") | |
846 (match_operand:SI 2 "const_int_operand")] ;; model | |
847 UNSPECV_XCHG) | |
848 (const_int 0))) | |
849 (set (zero_extract:SWI248 (match_dup 0) | |
850 (const_int 1) | |
851 (match_operand:SWI248 1 "nonmemory_operand" "rN")) | |
852 (not:SWI248 (zero_extract:SWI248 (match_dup 0) | |
853 (const_int 1) | |
854 (match_dup 1))))] | |
855 "" | |
856 "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}") | |
857 | |
858 (define_expand "atomic_bit_test_and_reset<mode>" | |
859 [(match_operand:SWI248 0 "register_operand") | |
860 (match_operand:SWI248 1 "memory_operand") | |
861 (match_operand:SWI248 2 "nonmemory_operand") | |
862 (match_operand:SI 3 "const_int_operand") ;; model | |
863 (match_operand:SI 4 "const_int_operand")] | |
864 "" | |
865 { | |
866 emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2], | |
867 operands[3])); | |
868 rtx tem = gen_reg_rtx (QImode); | |
869 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); | |
870 rtx result = convert_modes (<MODE>mode, QImode, tem, 1); | |
871 if (operands[4] == const0_rtx) | |
872 result = expand_simple_binop (<MODE>mode, ASHIFT, result, | |
873 operands[2], operands[0], 0, OPTAB_DIRECT); | |
874 if (result != operands[0]) | |
875 emit_move_insn (operands[0], result); | |
876 DONE; | |
877 }) | |
878 | |
879 (define_insn "atomic_bit_test_and_reset<mode>_1" | |
880 [(set (reg:CCC FLAGS_REG) | |
881 (compare:CCC | |
882 (unspec_volatile:SWI248 | |
883 [(match_operand:SWI248 0 "memory_operand" "+m") | |
884 (match_operand:SI 2 "const_int_operand")] ;; model | |
885 UNSPECV_XCHG) | |
886 (const_int 0))) | |
887 (set (zero_extract:SWI248 (match_dup 0) | |
888 (const_int 1) | |
889 (match_operand:SWI248 1 "nonmemory_operand" "rN")) | |
890 (const_int 0))] | |
891 "" | |
892 "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}") |