Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/nds32/nds32-memory-manipulation.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple | 1 /* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple |
2 and store_multiple pattern of Andes NDS32 cpu for GNU compiler | 2 and store_multiple pattern of Andes NDS32 cpu for GNU compiler |
3 Copyright (C) 2012-2017 Free Software Foundation, Inc. | 3 Copyright (C) 2012-2018 Free Software Foundation, Inc. |
4 Contributed by Andes Technology Corporation. | 4 Contributed by Andes Technology Corporation. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
8 GCC is free software; you can redistribute it and/or modify it | 8 GCC is free software; you can redistribute it and/or modify it |
18 You should have received a copy of the GNU General Public License | 18 You should have received a copy of the GNU General Public License |
19 along with GCC; see the file COPYING3. If not see | 19 along with GCC; see the file COPYING3. If not see |
20 <http://www.gnu.org/licenses/>. */ | 20 <http://www.gnu.org/licenses/>. */ |
21 | 21 |
22 /* ------------------------------------------------------------------------ */ | 22 /* ------------------------------------------------------------------------ */ |
23 | |
24 #define IN_TARGET_CODE 1 | |
23 | 25 |
24 #include "config.h" | 26 #include "config.h" |
25 #include "system.h" | 27 #include "system.h" |
26 #include "coretypes.h" | 28 #include "coretypes.h" |
27 #include "backend.h" | 29 #include "backend.h" |
28 #include "target.h" | 30 #include "target.h" |
29 #include "rtl.h" | 31 #include "rtl.h" |
30 #include "memmodel.h" | 32 #include "memmodel.h" |
31 #include "emit-rtl.h" | 33 #include "emit-rtl.h" |
32 #include "explow.h" | 34 #include "explow.h" |
35 #include "tree.h" | |
36 #include "expr.h" | |
37 #include "optabs.h" | |
38 #include "nds32-protos.h" | |
33 | 39 |
34 /* ------------------------------------------------------------------------ */ | 40 /* ------------------------------------------------------------------------ */ |
35 | 41 |
36 /* Functions to expand load_multiple and store_multiple. | 42 /* Auxiliary static function definitions. */ |
37 They are auxiliary extern functions to help create rtx template. | 43 |
38 Check nds32-multiple.md file for the patterns. */ | 44 static void |
39 rtx | 45 nds32_emit_load_store (rtx reg, rtx mem, |
40 nds32_expand_load_multiple (int base_regno, int count, | 46 enum machine_mode mode, |
41 rtx base_addr, rtx basemem) | 47 int offset, bool load_p) |
42 { | 48 { |
43 int par_index; | 49 rtx new_mem; |
44 int offset; | 50 new_mem = adjust_address (mem, mode, offset); |
45 rtx result; | 51 if (load_p) |
46 rtx new_addr, mem, reg; | 52 emit_move_insn (reg, new_mem); |
47 | 53 else |
48 /* Create the pattern that is presented in nds32-multiple.md. */ | 54 emit_move_insn (new_mem, reg); |
49 | 55 } |
50 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); | 56 |
51 | 57 static void |
52 for (par_index = 0; par_index < count; par_index++) | 58 nds32_emit_post_inc_load_store (rtx reg, rtx base_reg, |
53 { | 59 enum machine_mode mode, |
54 offset = par_index * 4; | 60 bool load_p) |
55 /* 4-byte for loading data to each register. */ | 61 { |
56 new_addr = plus_constant (Pmode, base_addr, offset); | 62 gcc_assert (GET_MODE (reg) == mode); |
57 mem = adjust_automodify_address_nv (basemem, SImode, | 63 gcc_assert (GET_MODE (base_reg) == Pmode); |
58 new_addr, offset); | 64 |
59 reg = gen_rtx_REG (SImode, base_regno + par_index); | 65 /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may |
60 | 66 not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */ |
61 XVECEXP (result, 0, par_index) = gen_rtx_SET (reg, mem); | 67 if (load_p) |
62 } | 68 emit_move_insn (reg, |
63 | 69 gen_rtx_MEM (mode, |
64 return result; | 70 base_reg)); |
65 } | 71 else |
66 | 72 emit_move_insn (gen_rtx_MEM (mode, |
67 rtx | 73 base_reg), |
68 nds32_expand_store_multiple (int base_regno, int count, | 74 reg); |
69 rtx base_addr, rtx basemem) | 75 |
70 { | 76 emit_move_insn (base_reg, |
71 int par_index; | 77 plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode))); |
72 int offset; | 78 } |
73 rtx result; | 79 |
74 rtx new_addr, mem, reg; | 80 static void |
75 | 81 nds32_emit_mem_move (rtx src, rtx dst, |
76 /* Create the pattern that is presented in nds32-multiple.md. */ | 82 enum machine_mode mode, |
77 | 83 int addr_offset) |
78 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); | 84 { |
79 | 85 gcc_assert (MEM_P (src) && MEM_P (dst)); |
80 for (par_index = 0; par_index < count; par_index++) | 86 rtx tmp_reg = gen_reg_rtx (mode); |
81 { | 87 nds32_emit_load_store (tmp_reg, src, mode, |
82 offset = par_index * 4; | 88 addr_offset, /* load_p */ true); |
83 /* 4-byte for storing data to memory. */ | 89 nds32_emit_load_store (tmp_reg, dst, mode, |
84 new_addr = plus_constant (Pmode, base_addr, offset); | 90 addr_offset, /* load_p */ false); |
85 mem = adjust_automodify_address_nv (basemem, SImode, | 91 } |
86 new_addr, offset); | 92 |
87 reg = gen_rtx_REG (SImode, base_regno + par_index); | 93 static void |
88 | 94 nds32_emit_mem_move_block (int base_regno, int count, |
89 XVECEXP (result, 0, par_index) = gen_rtx_SET (mem, reg); | 95 rtx *dst_base_reg, rtx *dst_mem, |
90 } | 96 rtx *src_base_reg, rtx *src_mem, |
91 | 97 bool update_base_reg_p) |
92 return result; | 98 { |
93 } | 99 rtx new_base_reg; |
94 | 100 |
95 /* Function to move block memory content by | 101 emit_insn (nds32_expand_load_multiple (base_regno, count, |
96 using load_multiple and store_multiple. | 102 *src_base_reg, *src_mem, |
97 This is auxiliary extern function to help create rtx template. | 103 update_base_reg_p, &new_base_reg)); |
98 Check nds32-multiple.md file for the patterns. */ | 104 if (update_base_reg_p) |
99 int | 105 { |
100 nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) | 106 *src_base_reg = new_base_reg; |
101 { | 107 *src_mem = gen_rtx_MEM (SImode, *src_base_reg); |
102 HOST_WIDE_INT in_words, out_words; | 108 } |
109 | |
110 emit_insn (nds32_expand_store_multiple (base_regno, count, | |
111 *dst_base_reg, *dst_mem, | |
112 update_base_reg_p, &new_base_reg)); | |
113 | |
114 if (update_base_reg_p) | |
115 { | |
116 *dst_base_reg = new_base_reg; | |
117 *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg); | |
118 } | |
119 } | |
120 | |
121 /* ------------------------------------------------------------------------ */ | |
122 | |
123 /* Auxiliary function for expand movmem pattern. */ | |
124 | |
125 static bool | |
126 nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, | |
127 rtx size, | |
128 rtx alignment) | |
129 { | |
130 /* Emit loop version of movmem. | |
131 | |
132 andi $size_least_3_bit, $size, #~7 | |
133 add $dst_end, $dst, $size | |
134 move $dst_itr, $dst | |
135 move $src_itr, $src | |
136 beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. | |
137 add $double_word_end, $dst, $size_least_3_bit | |
138 | |
139 .Ldouble_word_mode_loop: | |
140 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr | |
141 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr | |
142 ! move will delete after register allocation | |
143 move $src_itr, $src_itr' | |
144 move $dst_itr, $dst_itr' | |
145 ! Not readch upper bound. Loop. | |
146 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop | |
147 | |
148 .Lbyte_mode_entry: | |
149 beq $dst_itr, $dst_end, .Lend_label | |
150 .Lbyte_mode_loop: | |
151 lbi.bi $tmp, [$src_itr], #1 | |
152 sbi.bi $tmp, [$dst_itr], #1 | |
153 ! Not readch upper bound. Loop. | |
154 bne $dst_itr, $dst_end, .Lbyte_mode_loop | |
155 .Lend_label: | |
156 */ | |
103 rtx dst_base_reg, src_base_reg; | 157 rtx dst_base_reg, src_base_reg; |
158 rtx dst_itr, src_itr; | |
159 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; | |
160 rtx dst_end; | |
161 rtx size_least_3_bit; | |
162 rtx double_word_end; | |
163 rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label; | |
164 rtx tmp; | |
165 rtx mask_least_3_bit; | |
166 int start_regno; | |
167 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; | |
168 | |
169 if (TARGET_ISA_V3M && !align_to_4_bytes) | |
170 return 0; | |
171 | |
172 if (TARGET_REDUCED_REGS) | |
173 start_regno = 2; | |
174 else | |
175 start_regno = 16; | |
176 | |
177 dst_itr = gen_reg_rtx (Pmode); | |
178 src_itr = gen_reg_rtx (Pmode); | |
179 dst_end = gen_reg_rtx (Pmode); | |
180 tmp = gen_reg_rtx (QImode); | |
181 mask_least_3_bit = GEN_INT (~7); | |
182 | |
183 double_word_mode_loop = gen_label_rtx (); | |
184 byte_mode_entry = gen_label_rtx (); | |
185 byte_mode_loop = gen_label_rtx (); | |
186 end_label = gen_label_rtx (); | |
187 | |
188 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); | |
189 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); | |
190 /* andi $size_least_3_bit, $size, #~7 */ | |
191 size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit, | |
192 NULL_RTX, 0, OPTAB_WIDEN); | |
193 /* add $dst_end, $dst, $size */ | |
194 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, | |
195 NULL_RTX, 0, OPTAB_WIDEN); | |
196 | |
197 /* move $dst_itr, $dst | |
198 move $src_itr, $src */ | |
199 emit_move_insn (dst_itr, dst_base_reg); | |
200 emit_move_insn (src_itr, src_base_reg); | |
201 | |
202 /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */ | |
203 emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL, | |
204 SImode, 1, byte_mode_entry); | |
205 /* add $double_word_end, $dst, $size_least_3_bit */ | |
206 double_word_end = expand_binop (Pmode, add_optab, | |
207 dst_base_reg, size_least_3_bit, | |
208 NULL_RTX, 0, OPTAB_WIDEN); | |
209 | |
210 /* .Ldouble_word_mode_loop: */ | |
211 emit_label (double_word_mode_loop); | |
212 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr | |
213 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ | |
214 src_itr_m = src_itr; | |
215 dst_itr_m = dst_itr; | |
216 srcmem_m = srcmem; | |
217 dstmem_m = dstmem; | |
218 nds32_emit_mem_move_block (start_regno, 2, | |
219 &dst_itr_m, &dstmem_m, | |
220 &src_itr_m, &srcmem_m, | |
221 true); | |
222 /* move $src_itr, $src_itr' | |
223 move $dst_itr, $dst_itr' */ | |
224 emit_move_insn (dst_itr, dst_itr_m); | |
225 emit_move_insn (src_itr, src_itr_m); | |
226 | |
227 /* ! Not readch upper bound. Loop. | |
228 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ | |
229 emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL, | |
230 Pmode, 1, double_word_mode_loop); | |
231 /* .Lbyte_mode_entry: */ | |
232 emit_label (byte_mode_entry); | |
233 | |
234 /* beq $dst_itr, $dst_end, .Lend_label */ | |
235 emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL, | |
236 Pmode, 1, end_label); | |
237 /* .Lbyte_mode_loop: */ | |
238 emit_label (byte_mode_loop); | |
239 | |
240 /* lbi.bi $tmp, [$src_itr], #1 */ | |
241 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); | |
242 | |
243 /* sbi.bi $tmp, [$dst_itr], #1 */ | |
244 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); | |
245 /* ! Not readch upper bound. Loop. | |
246 bne $dst_itr, $dst_end, .Lbyte_mode_loop */ | |
247 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, | |
248 SImode, 1, byte_mode_loop); | |
249 | |
250 /* .Lend_label: */ | |
251 emit_label (end_label); | |
252 | |
253 return true; | |
254 } | |
255 | |
256 static bool | |
257 nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, | |
258 rtx size, rtx alignment) | |
259 { | |
260 rtx dst_base_reg, src_base_reg; | |
261 rtx dst_itr, src_itr; | |
262 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; | |
263 rtx dst_end; | |
264 rtx double_word_mode_loop, byte_mode_loop; | |
265 rtx tmp; | |
266 int start_regno; | |
267 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; | |
268 unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); | |
269 | |
270 if (TARGET_ISA_V3M && !align_to_4_bytes) | |
271 return 0; | |
272 | |
273 if (TARGET_REDUCED_REGS) | |
274 start_regno = 2; | |
275 else | |
276 start_regno = 16; | |
277 | |
278 dst_itr = gen_reg_rtx (Pmode); | |
279 src_itr = gen_reg_rtx (Pmode); | |
280 dst_end = gen_reg_rtx (Pmode); | |
281 tmp = gen_reg_rtx (QImode); | |
282 | |
283 double_word_mode_loop = gen_label_rtx (); | |
284 byte_mode_loop = gen_label_rtx (); | |
285 | |
286 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); | |
287 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); | |
288 | |
289 if (total_bytes < 8) | |
290 { | |
291 /* Emit total_bytes less than 8 loop version of movmem. | |
292 add $dst_end, $dst, $size | |
293 move $dst_itr, $dst | |
294 .Lbyte_mode_loop: | |
295 lbi.bi $tmp, [$src_itr], #1 | |
296 sbi.bi $tmp, [$dst_itr], #1 | |
297 ! Not readch upper bound. Loop. | |
298 bne $dst_itr, $dst_end, .Lbyte_mode_loop */ | |
299 | |
300 /* add $dst_end, $dst, $size */ | |
301 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, | |
302 NULL_RTX, 0, OPTAB_WIDEN); | |
303 /* move $dst_itr, $dst | |
304 move $src_itr, $src */ | |
305 emit_move_insn (dst_itr, dst_base_reg); | |
306 emit_move_insn (src_itr, src_base_reg); | |
307 | |
308 /* .Lbyte_mode_loop: */ | |
309 emit_label (byte_mode_loop); | |
310 | |
311 /* lbi.bi $tmp, [$src_itr], #1 */ | |
312 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); | |
313 | |
314 /* sbi.bi $tmp, [$dst_itr], #1 */ | |
315 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); | |
316 /* ! Not readch upper bound. Loop. | |
317 bne $dst_itr, $dst_end, .Lbyte_mode_loop */ | |
318 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, | |
319 SImode, 1, byte_mode_loop); | |
320 return true; | |
321 } | |
322 else if (total_bytes % 8 == 0) | |
323 { | |
324 /* Emit multiple of 8 loop version of movmem. | |
325 | |
326 add $dst_end, $dst, $size | |
327 move $dst_itr, $dst | |
328 move $src_itr, $src | |
329 | |
330 .Ldouble_word_mode_loop: | |
331 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr | |
332 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr | |
333 ! move will delete after register allocation | |
334 move $src_itr, $src_itr' | |
335 move $dst_itr, $dst_itr' | |
336 ! Not readch upper bound. Loop. | |
337 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ | |
338 | |
339 /* add $dst_end, $dst, $size */ | |
340 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, | |
341 NULL_RTX, 0, OPTAB_WIDEN); | |
342 | |
343 /* move $dst_itr, $dst | |
344 move $src_itr, $src */ | |
345 emit_move_insn (dst_itr, dst_base_reg); | |
346 emit_move_insn (src_itr, src_base_reg); | |
347 | |
348 /* .Ldouble_word_mode_loop: */ | |
349 emit_label (double_word_mode_loop); | |
350 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr | |
351 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ | |
352 src_itr_m = src_itr; | |
353 dst_itr_m = dst_itr; | |
354 srcmem_m = srcmem; | |
355 dstmem_m = dstmem; | |
356 nds32_emit_mem_move_block (start_regno, 2, | |
357 &dst_itr_m, &dstmem_m, | |
358 &src_itr_m, &srcmem_m, | |
359 true); | |
360 /* move $src_itr, $src_itr' | |
361 move $dst_itr, $dst_itr' */ | |
362 emit_move_insn (dst_itr, dst_itr_m); | |
363 emit_move_insn (src_itr, src_itr_m); | |
364 | |
365 /* ! Not readch upper bound. Loop. | |
366 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ | |
367 emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, | |
368 Pmode, 1, double_word_mode_loop); | |
369 } | |
370 else | |
371 { | |
372 /* Handle size greater than 8, and not a multiple of 8. */ | |
373 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, | |
374 size, alignment); | |
375 } | |
376 | |
377 return true; | |
378 } | |
379 | |
380 static bool | |
381 nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem, | |
382 rtx size, rtx alignment) | |
383 { | |
384 if (CONST_INT_P (size)) | |
385 return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem, | |
386 size, alignment); | |
387 else | |
388 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, | |
389 size, alignment); | |
390 } | |
391 | |
392 static bool | |
393 nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, | |
394 rtx total_bytes, rtx alignment) | |
395 { | |
396 rtx dst_base_reg, src_base_reg; | |
397 rtx tmp_reg; | |
104 int maximum_bytes; | 398 int maximum_bytes; |
399 int maximum_bytes_per_inst; | |
400 int maximum_regs; | |
401 int start_regno; | |
402 int i, inst_num; | |
403 HOST_WIDE_INT remain_bytes, remain_words; | |
404 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; | |
405 bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0; | |
105 | 406 |
106 /* Because reduced-set regsiters has few registers | 407 /* Because reduced-set regsiters has few registers |
107 (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' | 408 (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' |
108 cannot be used for register allocation), | 409 cannot be used for register allocation), |
109 using 8 registers (32 bytes) for moving memory block | 410 using 8 registers (32 bytes) for moving memory block |
110 may easily consume all of them. | 411 may easily consume all of them. |
111 It makes register allocation/spilling hard to work. | 412 It makes register allocation/spilling hard to work. |
112 So we only allow maximum=4 registers (16 bytes) for | 413 So we only allow maximum=4 registers (16 bytes) for |
113 moving memory block under reduced-set registers. */ | 414 moving memory block under reduced-set registers. */ |
114 if (TARGET_REDUCED_REGS) | 415 if (TARGET_REDUCED_REGS) |
115 maximum_bytes = 16; | 416 { |
417 maximum_regs = 4; | |
418 maximum_bytes = 64; | |
419 start_regno = 2; | |
420 } | |
116 else | 421 else |
117 maximum_bytes = 32; | 422 { |
423 /* $r25 is $tp so we use up to 8 registers. */ | |
424 maximum_regs = 8; | |
425 maximum_bytes = 160; | |
426 start_regno = 16; | |
427 } | |
428 maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD; | |
118 | 429 |
119 /* 1. Total_bytes is integer for sure. | 430 /* 1. Total_bytes is integer for sure. |
120 2. Alignment is integer for sure. | 431 2. Alignment is integer for sure. |
121 3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes. | 432 3. Maximum 4 or 10 registers and up to 4 instructions, |
122 4. Requires (n * 4) block size. | 433 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes. |
123 5. Requires 4-byte alignment. */ | 434 4. The dstmem cannot be volatile memory access. |
435 5. The srcmem cannot be volatile memory access. | |
436 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT* | |
437 support unalign access with v3m configure. */ | |
124 if (GET_CODE (total_bytes) != CONST_INT | 438 if (GET_CODE (total_bytes) != CONST_INT |
125 || GET_CODE (alignment) != CONST_INT | 439 || GET_CODE (alignment) != CONST_INT |
126 || INTVAL (total_bytes) > maximum_bytes | 440 || INTVAL (total_bytes) > maximum_bytes |
127 || INTVAL (total_bytes) & 3 | 441 || MEM_VOLATILE_P (dstmem) |
128 || INTVAL (alignment) & 3) | 442 || MEM_VOLATILE_P (srcmem) |
129 return 0; | 443 || (TARGET_ISA_V3M && !align_to_4_bytes)) |
444 return false; | |
130 | 445 |
131 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); | 446 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); |
132 src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); | 447 src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); |
133 | 448 remain_bytes = INTVAL (total_bytes); |
134 out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD; | 449 |
135 | 450 /* Do not update base address for last lmw/smw pair. */ |
136 emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem)); | 451 inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1)) |
137 emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem)); | 452 / maximum_bytes_per_inst) - 1; |
138 | 453 |
139 /* Successfully create patterns, return 1. */ | 454 for (i = 0; i < inst_num; i++) |
140 return 1; | 455 { |
456 nds32_emit_mem_move_block (start_regno, maximum_regs, | |
457 &dst_base_reg, &dstmem, | |
458 &src_base_reg, &srcmem, | |
459 true); | |
460 } | |
461 remain_bytes -= maximum_bytes_per_inst * inst_num; | |
462 | |
463 remain_words = remain_bytes / UNITS_PER_WORD; | |
464 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); | |
465 | |
466 if (remain_words != 0) | |
467 { | |
468 if (remain_bytes != 0) | |
469 nds32_emit_mem_move_block (start_regno, remain_words, | |
470 &dst_base_reg, &dstmem, | |
471 &src_base_reg, &srcmem, | |
472 true); | |
473 else | |
474 { | |
475 /* Do not update address if no further byte to move. */ | |
476 if (remain_words == 1) | |
477 { | |
478 /* emit move instruction if align to 4 byte and only 1 | |
479 word to move. */ | |
480 if (align_to_4_bytes) | |
481 nds32_emit_mem_move (srcmem, dstmem, SImode, 0); | |
482 else | |
483 { | |
484 tmp_reg = gen_reg_rtx (SImode); | |
485 emit_insn ( | |
486 gen_unaligned_load_w (tmp_reg, | |
487 gen_rtx_MEM (SImode, src_base_reg))); | |
488 emit_insn ( | |
489 gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg), | |
490 tmp_reg)); | |
491 } | |
492 } | |
493 else | |
494 nds32_emit_mem_move_block (start_regno, remain_words, | |
495 &dst_base_reg, &dstmem, | |
496 &src_base_reg, &srcmem, | |
497 false); | |
498 } | |
499 } | |
500 | |
501 switch (remain_bytes) | |
502 { | |
503 case 3: | |
504 case 2: | |
505 { | |
506 if (align_to_2_bytes) | |
507 nds32_emit_mem_move (srcmem, dstmem, HImode, 0); | |
508 else | |
509 { | |
510 nds32_emit_mem_move (srcmem, dstmem, QImode, 0); | |
511 nds32_emit_mem_move (srcmem, dstmem, QImode, 1); | |
512 } | |
513 | |
514 if (remain_bytes == 3) | |
515 nds32_emit_mem_move (srcmem, dstmem, QImode, 2); | |
516 break; | |
517 } | |
518 case 1: | |
519 nds32_emit_mem_move (srcmem, dstmem, QImode, 0); | |
520 break; | |
521 case 0: | |
522 break; | |
523 default: | |
524 gcc_unreachable (); | |
525 } | |
526 | |
527 /* Successfully create patterns, return true. */ | |
528 return true; | |
529 } | |
530 | |
531 /* Function to move block memory content by | |
532 using load_multiple and store_multiple. | |
533 This is auxiliary extern function to help create rtx template. | |
534 Check nds32-multiple.md file for the patterns. */ | |
535 bool | |
536 nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) | |
537 { | |
538 if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment)) | |
539 return true; | |
540 | |
541 if (!optimize_size && optimize > 2) | |
542 return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment); | |
543 | |
544 return false; | |
141 } | 545 } |
142 | 546 |
143 /* ------------------------------------------------------------------------ */ | 547 /* ------------------------------------------------------------------------ */ |
548 | |
549 /* Auxiliary function for expand setmem pattern. */ | |
550 | |
551 static rtx | |
552 nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) | |
553 { | |
554 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); | |
555 | |
556 if (CONST_INT_P (value)) | |
557 { | |
558 unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode); | |
559 rtx new_val = gen_int_mode (val | (val << 8) | |
560 | (val << 16) | (val << 24), SImode); | |
561 /* Just calculate at here if it's constant value. */ | |
562 emit_move_insn (value4word, new_val); | |
563 } | |
564 else | |
565 { | |
566 if (NDS32_EXT_DSP_P ()) | |
567 { | |
568 /* ! prepare word | |
569 insb $tmp, $value, 1 ! $tmp <- 0x0000abab | |
570 pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ | |
571 rtx tmp = gen_reg_rtx (SImode); | |
572 | |
573 convert_move (tmp, value, true); | |
574 | |
575 emit_insn ( | |
576 gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); | |
577 | |
578 emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); | |
579 } | |
580 else | |
581 { | |
582 /* ! prepare word | |
583 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab | |
584 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 | |
585 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab | |
586 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 | |
587 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ | |
588 | |
589 rtx tmp1, tmp2, tmp3, tmp4; | |
590 tmp1 = expand_binop (SImode, and_optab, value, | |
591 gen_int_mode (0xff, SImode), | |
592 NULL_RTX, 0, OPTAB_WIDEN); | |
593 tmp2 = expand_binop (SImode, ashl_optab, tmp1, | |
594 gen_int_mode (8, SImode), | |
595 NULL_RTX, 0, OPTAB_WIDEN); | |
596 tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, | |
597 NULL_RTX, 0, OPTAB_WIDEN); | |
598 tmp4 = expand_binop (SImode, ashl_optab, tmp3, | |
599 gen_int_mode (16, SImode), | |
600 NULL_RTX, 0, OPTAB_WIDEN); | |
601 | |
602 emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); | |
603 } | |
604 } | |
605 | |
606 return value4word; | |
607 } | |
608 | |
609 static rtx | |
610 nds32_gen_dup_4_byte_to_word_value (rtx value) | |
611 { | |
612 rtx value4word = gen_reg_rtx (SImode); | |
613 nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); | |
614 | |
615 return value4word; | |
616 } | |
617 | |
618 static rtx | |
619 nds32_gen_dup_8_byte_to_double_word_value (rtx value) | |
620 { | |
621 rtx value4doubleword = gen_reg_rtx (DImode); | |
622 | |
623 nds32_gen_dup_4_byte_to_word_value_aux ( | |
624 value, nds32_di_low_part_subreg(value4doubleword)); | |
625 | |
626 emit_move_insn (nds32_di_high_part_subreg(value4doubleword), | |
627 nds32_di_low_part_subreg(value4doubleword)); | |
628 return value4doubleword; | |
629 } | |
630 | |
631 | |
632 static rtx | |
633 emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) | |
634 { | |
635 rtx word_mode_label = gen_label_rtx (); | |
636 rtx word_mode_end_label = gen_label_rtx (); | |
637 rtx byte_mode_size = gen_reg_rtx (SImode); | |
638 rtx byte_mode_size_tmp = gen_reg_rtx (SImode); | |
639 rtx word_mode_end = gen_reg_rtx (SImode); | |
640 rtx size_for_word = gen_reg_rtx (SImode); | |
641 | |
642 /* and $size_for_word, $size, #~0x7 */ | |
643 size_for_word = expand_binop (SImode, and_optab, size, | |
644 gen_int_mode (~0x7, SImode), | |
645 NULL_RTX, 0, OPTAB_WIDEN); | |
646 | |
647 emit_move_insn (byte_mode_size, size); | |
648 | |
649 /* beqz $size_for_word, .Lbyte_mode_entry */ | |
650 emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL, | |
651 SImode, 1, word_mode_end_label); | |
652 /* add $word_mode_end, $dst, $size_for_word */ | |
653 word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, | |
654 NULL_RTX, 0, OPTAB_WIDEN); | |
655 | |
656 /* andi $byte_mode_size, $size, 0x7 */ | |
657 byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), | |
658 NULL_RTX, 0, OPTAB_WIDEN); | |
659 | |
660 emit_move_insn (byte_mode_size, byte_mode_size_tmp); | |
661 | |
662 /* .Lword_mode: */ | |
663 emit_label (word_mode_label); | |
664 /* ! word-mode set loop | |
665 smw.bim $value4word, [$dst_itr], $value4word, 0 | |
666 bne $word_mode_end, $dst_itr, .Lword_mode */ | |
667 emit_insn (gen_unaligned_store_update_base_dw (itr, | |
668 itr, | |
669 value)); | |
670 emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, | |
671 Pmode, 1, word_mode_label); | |
672 | |
673 emit_label (word_mode_end_label); | |
674 | |
675 return byte_mode_size; | |
676 } | |
677 | |
678 static rtx | |
679 emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) | |
680 { | |
681 rtx end = gen_reg_rtx (Pmode); | |
682 rtx byte_mode_label = gen_label_rtx (); | |
683 rtx end_label = gen_label_rtx (); | |
684 | |
685 value = force_reg (QImode, value); | |
686 | |
687 if (need_end) | |
688 end = expand_binop (Pmode, add_optab, itr, size, | |
689 NULL_RTX, 0, OPTAB_WIDEN); | |
690 /* beqz $byte_mode_size, .Lend | |
691 add $byte_mode_end, $dst_itr, $byte_mode_size */ | |
692 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL, | |
693 SImode, 1, end_label); | |
694 | |
695 if (!need_end) | |
696 end = expand_binop (Pmode, add_optab, itr, size, | |
697 NULL_RTX, 0, OPTAB_WIDEN); | |
698 | |
699 /* .Lbyte_mode: */ | |
700 emit_label (byte_mode_label); | |
701 | |
702 /* ! byte-mode set loop | |
703 sbi.bi $value, [$dst_itr] ,1 | |
704 bne $byte_mode_end, $dst_itr, .Lbyte_mode */ | |
705 nds32_emit_post_inc_load_store (value, itr, QImode, false); | |
706 | |
707 emit_cmp_and_jump_insns (end, itr, NE, NULL, | |
708 Pmode, 1, byte_mode_label); | |
709 /* .Lend: */ | |
710 emit_label (end_label); | |
711 | |
712 if (need_end) | |
713 return end; | |
714 else | |
715 return NULL_RTX; | |
716 } | |
717 | |
718 static bool | |
719 nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) | |
720 { | |
721 rtx value4doubleword; | |
722 rtx value4byte; | |
723 rtx dst; | |
724 rtx byte_mode_size; | |
725 | |
726 /* Emit loop version of setmem. | |
727 memset: | |
728 ! prepare word | |
729 andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab | |
730 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 | |
731 or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab | |
732 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 | |
733 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab | |
734 | |
735 and $size_for_word, $size, #-4 | |
736 beqz $size_for_word, .Lword_mode_end | |
737 | |
738 add $word_mode_end, $dst, $size_for_word | |
739 andi $byte_mode_size, $size, 3 | |
740 | |
741 .Lword_mode: | |
742 ! word-mode set loop | |
743 smw.bim $value4word, [$dst], $value4word, 0 | |
744 bne $word_mode_end, $dst, .Lword_mode | |
745 | |
746 .Lword_mode_end: | |
747 beqz $byte_mode_size, .Lend | |
748 add $byte_mode_end, $dst, $byte_mode_size | |
749 | |
750 .Lbyte_mode: | |
751 ! byte-mode set loop | |
752 sbi.bi $value4word, [$dst] ,1 | |
753 bne $byte_mode_end, $dst, .Lbyte_mode | |
754 .Lend: */ | |
755 | |
756 dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); | |
757 | |
758 /* ! prepare word | |
759 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab | |
760 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 | |
761 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab | |
762 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 | |
763 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ | |
764 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); | |
765 | |
766 /* and $size_for_word, $size, #-4 | |
767 beqz $size_for_word, .Lword_mode_end | |
768 | |
769 add $word_mode_end, $dst, $size_for_word | |
770 andi $byte_mode_size, $size, 3 | |
771 | |
772 .Lword_mode: | |
773 ! word-mode set loop | |
774 smw.bim $value4word, [$dst], $value4word, 0 | |
775 bne $word_mode_end, $dst, .Lword_mode | |
776 .Lword_mode_end: */ | |
777 byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); | |
778 | |
779 /* beqz $byte_mode_size, .Lend | |
780 add $byte_mode_end, $dst, $byte_mode_size | |
781 | |
782 .Lbyte_mode: | |
783 ! byte-mode set loop | |
784 sbi.bi $value, [$dst] ,1 | |
785 bne $byte_mode_end, $dst, .Lbyte_mode | |
786 .Lend: */ | |
787 | |
788 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, | |
789 subreg_lowpart_offset (QImode, DImode)); | |
790 | |
791 emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); | |
792 | |
793 return true; | |
794 } | |
795 | |
796 static bool | |
797 nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) | |
798 { | |
799 rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); | |
800 rtx need_align_bytes = gen_reg_rtx (SImode); | |
801 rtx last_2_bit = gen_reg_rtx (SImode); | |
802 rtx byte_loop_base = gen_reg_rtx (SImode); | |
803 rtx byte_loop_size = gen_reg_rtx (SImode); | |
804 rtx remain_size = gen_reg_rtx (SImode); | |
805 rtx new_base_reg; | |
806 rtx value4byte, value4doubleword; | |
807 rtx byte_mode_size; | |
808 rtx last_byte_loop_label = gen_label_rtx (); | |
809 | |
810 size = force_reg (SImode, size); | |
811 | |
812 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); | |
813 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, | |
814 subreg_lowpart_offset (QImode, DImode)); | |
815 | |
816 emit_move_insn (byte_loop_size, size); | |
817 emit_move_insn (byte_loop_base, base_reg); | |
818 | |
819 /* Jump to last byte loop if size is less than 16. */ | |
820 emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL, | |
821 SImode, 1, last_byte_loop_label); | |
822 | |
823 /* Make sure align to 4 byte first since v3m can't unalign access. */ | |
824 emit_insn (gen_andsi3 (last_2_bit, | |
825 base_reg, | |
826 gen_int_mode (0x3, SImode))); | |
827 | |
828 emit_insn (gen_subsi3 (need_align_bytes, | |
829 gen_int_mode (4, SImode), | |
830 last_2_bit)); | |
831 | |
832 /* Align to 4 byte. */ | |
833 new_base_reg = emit_setmem_byte_loop (base_reg, | |
834 need_align_bytes, | |
835 value4byte, | |
836 true); | |
837 | |
838 /* Calculate remain size. */ | |
839 emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); | |
840 | |
841 /* Set memory word by word. */ | |
842 byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, | |
843 remain_size, | |
844 value4doubleword); | |
845 | |
846 emit_move_insn (byte_loop_base, new_base_reg); | |
847 emit_move_insn (byte_loop_size, byte_mode_size); | |
848 | |
849 emit_label (last_byte_loop_label); | |
850 | |
851 /* And set memory for remain bytes. */ | |
852 emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false); | |
853 return true; | |
854 } | |
855 | |
856 static bool | |
857 nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, | |
858 rtx align ATTRIBUTE_UNUSED, | |
859 rtx expected_align ATTRIBUTE_UNUSED, | |
860 rtx expected_size ATTRIBUTE_UNUSED) | |
861 { | |
862 unsigned maximum_regs, maximum_bytes, start_regno, regno; | |
863 rtx value4word; | |
864 rtx dst_base_reg, new_base_reg; | |
865 unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; | |
866 unsigned HOST_WIDE_INT real_size; | |
867 | |
868 if (TARGET_REDUCED_REGS) | |
869 { | |
870 maximum_regs = 4; | |
871 maximum_bytes = 64; | |
872 start_regno = 2; | |
873 } | |
874 else | |
875 { | |
876 maximum_regs = 8; | |
877 maximum_bytes = 128; | |
878 start_regno = 16; | |
879 } | |
880 | |
881 real_size = UINTVAL (size) & GET_MODE_MASK(SImode); | |
882 | |
883 if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) | |
884 return false; | |
885 | |
886 remain_bytes = real_size; | |
887 | |
888 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); | |
889 | |
890 value4word = nds32_gen_dup_4_byte_to_word_value (value); | |
891 | |
892 prepare_regs = remain_bytes / UNITS_PER_WORD; | |
893 | |
894 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); | |
895 | |
896 if (prepare_regs > maximum_regs) | |
897 prepare_regs = maximum_regs; | |
898 | |
899 fill_per_smw = prepare_regs * UNITS_PER_WORD; | |
900 | |
901 regno = start_regno; | |
902 switch (prepare_regs) | |
903 { | |
904 case 2: | |
905 default: | |
906 { | |
907 rtx reg0 = gen_rtx_REG (SImode, regno); | |
908 rtx reg1 = gen_rtx_REG (SImode, regno+1); | |
909 unsigned last_regno = start_regno + prepare_regs - 1; | |
910 | |
911 emit_move_insn (reg0, value4word); | |
912 emit_move_insn (reg1, value4word); | |
913 rtx regd = gen_rtx_REG (DImode, regno); | |
914 regno += 2; | |
915 | |
916 /* Try to utilize movd44! */ | |
917 while (regno <= last_regno) | |
918 { | |
919 if ((regno + 1) <=last_regno) | |
920 { | |
921 rtx reg = gen_rtx_REG (DImode, regno); | |
922 emit_move_insn (reg, regd); | |
923 regno += 2; | |
924 } | |
925 else | |
926 { | |
927 rtx reg = gen_rtx_REG (SImode, regno); | |
928 emit_move_insn (reg, reg0); | |
929 regno += 1; | |
930 } | |
931 } | |
932 break; | |
933 } | |
934 case 1: | |
935 { | |
936 rtx reg = gen_rtx_REG (SImode, regno++); | |
937 emit_move_insn (reg, value4word); | |
938 } | |
939 break; | |
940 case 0: | |
941 break; | |
942 } | |
943 | |
944 if (fill_per_smw) | |
945 for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) | |
946 { | |
947 emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, | |
948 dst_base_reg, dstmem, | |
949 true, &new_base_reg)); | |
950 dst_base_reg = new_base_reg; | |
951 dstmem = gen_rtx_MEM (SImode, dst_base_reg); | |
952 } | |
953 | |
954 remain_words = remain_bytes / UNITS_PER_WORD; | |
955 | |
956 if (remain_words) | |
957 { | |
958 emit_insn (nds32_expand_store_multiple (start_regno, remain_words, | |
959 dst_base_reg, dstmem, | |
960 true, &new_base_reg)); | |
961 dst_base_reg = new_base_reg; | |
962 dstmem = gen_rtx_MEM (SImode, dst_base_reg); | |
963 } | |
964 | |
965 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); | |
966 | |
967 if (remain_bytes) | |
968 { | |
969 value = simplify_gen_subreg (QImode, value4word, SImode, | |
970 subreg_lowpart_offset(QImode, SImode)); | |
971 int offset = 0; | |
972 for (;remain_bytes;--remain_bytes, ++offset) | |
973 { | |
974 nds32_emit_load_store (value, dstmem, QImode, offset, false); | |
975 } | |
976 } | |
977 | |
978 return true; | |
979 } | |
980 | |
981 bool | |
982 nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align, | |
983 rtx expected_align, | |
984 rtx expected_size) | |
985 { | |
986 bool align_to_4_bytes = (INTVAL (align) & 3) == 0; | |
987 | |
988 /* Only expand at O3 */ | |
989 if (optimize_size || optimize < 3) | |
990 return false; | |
991 | |
992 if (TARGET_ISA_V3M && !align_to_4_bytes) | |
993 return nds32_expand_setmem_loop_v3m (dstmem, size, value); | |
994 | |
995 if (nds32_expand_setmem_unroll (dstmem, size, value, | |
996 align, expected_align, expected_size)) | |
997 return true; | |
998 | |
999 return nds32_expand_setmem_loop (dstmem, size, value); | |
1000 } | |
1001 | |
1002 /* ------------------------------------------------------------------------ */ | |
1003 | |
1004 /* Auxiliary function for expand strlen pattern. */ | |
1005 | |
1006 bool | |
1007 nds32_expand_strlen (rtx result, rtx str, | |
1008 rtx target_char, rtx align ATTRIBUTE_UNUSED) | |
1009 { | |
1010 rtx base_reg, backup_base_reg; | |
1011 rtx ffb_result; | |
1012 rtx target_char_ptr, length; | |
1013 rtx loop_label, tmp; | |
1014 | |
1015 if (optimize_size || optimize < 3) | |
1016 return false; | |
1017 | |
1018 gcc_assert (MEM_P (str)); | |
1019 gcc_assert (CONST_INT_P (target_char) || REG_P (target_char)); | |
1020 | |
1021 base_reg = copy_to_mode_reg (SImode, XEXP (str, 0)); | |
1022 loop_label = gen_label_rtx (); | |
1023 | |
1024 ffb_result = gen_reg_rtx (Pmode); | |
1025 tmp = gen_reg_rtx (SImode); | |
1026 backup_base_reg = gen_reg_rtx (SImode); | |
1027 | |
1028 /* Emit loop version of strlen. | |
1029 move $backup_base, $base | |
1030 .Lloop: | |
1031 lmw.bim $tmp, [$base], $tmp, 0 | |
1032 ffb $ffb_result, $tmp, $target_char ! is there $target_char? | |
1033 beqz $ffb_result, .Lloop | |
1034 add $last_char_ptr, $base, $ffb_result | |
1035 sub $length, $last_char_ptr, $backup_base */ | |
1036 | |
1037 /* move $backup_base, $base */ | |
1038 emit_move_insn (backup_base_reg, base_reg); | |
1039 | |
1040 /* .Lloop: */ | |
1041 emit_label (loop_label); | |
1042 /* lmw.bim $tmp, [$base], $tmp, 0 */ | |
1043 emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg)); | |
1044 | |
1045 /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */ | |
1046 emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char)); | |
1047 | |
1048 /* beqz $ffb_result, .Lloop */ | |
1049 emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL, | |
1050 SImode, 1, loop_label); | |
1051 | |
1052 /* add $target_char_ptr, $base, $ffb_result */ | |
1053 target_char_ptr = expand_binop (Pmode, add_optab, base_reg, | |
1054 ffb_result, NULL_RTX, 0, OPTAB_WIDEN); | |
1055 | |
1056 /* sub $length, $target_char_ptr, $backup_base */ | |
1057 length = expand_binop (Pmode, sub_optab, target_char_ptr, | |
1058 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN); | |
1059 | |
1060 emit_move_insn (result, length); | |
1061 | |
1062 return true; | |
1063 } | |
1064 | |
1065 /* ------------------------------------------------------------------------ */ | |
1066 | |
1067 /* Functions to expand load_multiple and store_multiple. | |
1068 They are auxiliary extern functions to help create rtx template. | |
1069 Check nds32-multiple.md file for the patterns. */ | |
1070 rtx | |
1071 nds32_expand_load_multiple (int base_regno, int count, | |
1072 rtx base_addr, rtx basemem, | |
1073 bool update_base_reg_p, | |
1074 rtx *update_base_reg) | |
1075 { | |
1076 int par_index; | |
1077 int offset; | |
1078 int start_idx; | |
1079 rtx result; | |
1080 rtx new_addr, mem, reg; | |
1081 | |
1082 /* Generate a unaligned load to prevent load instruction pull out from | |
1083 parallel, and then it will generate lwi, and lose unaligned acces */ | |
1084 if (count == 1) | |
1085 { | |
1086 reg = gen_rtx_REG (SImode, base_regno); | |
1087 if (update_base_reg_p) | |
1088 { | |
1089 *update_base_reg = gen_reg_rtx (SImode); | |
1090 return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr); | |
1091 } | |
1092 else | |
1093 return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr)); | |
1094 } | |
1095 | |
1096 /* Create the pattern that is presented in nds32-multiple.md. */ | |
1097 if (update_base_reg_p) | |
1098 { | |
1099 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); | |
1100 start_idx = 1; | |
1101 } | |
1102 else | |
1103 { | |
1104 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); | |
1105 start_idx = 0; | |
1106 } | |
1107 | |
1108 if (update_base_reg_p) | |
1109 { | |
1110 offset = count * 4; | |
1111 new_addr = plus_constant (Pmode, base_addr, offset); | |
1112 *update_base_reg = gen_reg_rtx (SImode); | |
1113 | |
1114 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); | |
1115 } | |
1116 | |
1117 for (par_index = 0; par_index < count; par_index++) | |
1118 { | |
1119 offset = par_index * 4; | |
1120 /* 4-byte for loading data to each register. */ | |
1121 new_addr = plus_constant (Pmode, base_addr, offset); | |
1122 mem = adjust_automodify_address_nv (basemem, SImode, | |
1123 new_addr, offset); | |
1124 reg = gen_rtx_REG (SImode, base_regno + par_index); | |
1125 | |
1126 XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem); | |
1127 } | |
1128 | |
1129 return result; | |
1130 } | |
1131 | |
1132 rtx | |
1133 nds32_expand_store_multiple (int base_regno, int count, | |
1134 rtx base_addr, rtx basemem, | |
1135 bool update_base_reg_p, | |
1136 rtx *update_base_reg) | |
1137 { | |
1138 int par_index; | |
1139 int offset; | |
1140 int start_idx; | |
1141 rtx result; | |
1142 rtx new_addr, mem, reg; | |
1143 | |
1144 if (count == 1) | |
1145 { | |
1146 reg = gen_rtx_REG (SImode, base_regno); | |
1147 if (update_base_reg_p) | |
1148 { | |
1149 *update_base_reg = gen_reg_rtx (SImode); | |
1150 return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg); | |
1151 } | |
1152 else | |
1153 return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg); | |
1154 } | |
1155 | |
1156 /* Create the pattern that is presented in nds32-multiple.md. */ | |
1157 | |
1158 if (update_base_reg_p) | |
1159 { | |
1160 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); | |
1161 start_idx = 1; | |
1162 } | |
1163 else | |
1164 { | |
1165 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); | |
1166 start_idx = 0; | |
1167 } | |
1168 | |
1169 if (update_base_reg_p) | |
1170 { | |
1171 offset = count * 4; | |
1172 new_addr = plus_constant (Pmode, base_addr, offset); | |
1173 *update_base_reg = gen_reg_rtx (SImode); | |
1174 | |
1175 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); | |
1176 } | |
1177 | |
1178 for (par_index = 0; par_index < count; par_index++) | |
1179 { | |
1180 offset = par_index * 4; | |
1181 /* 4-byte for storing data to memory. */ | |
1182 new_addr = plus_constant (Pmode, base_addr, offset); | |
1183 mem = adjust_automodify_address_nv (basemem, SImode, | |
1184 new_addr, offset); | |
1185 reg = gen_rtx_REG (SImode, base_regno + par_index); | |
1186 | |
1187 XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg); | |
1188 } | |
1189 | |
1190 return result; | |
1191 } | |
1192 | |
1193 /* ------------------------------------------------------------------------ */ |