111
|
1 /* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple
|
|
2 and store_multiple pattern of Andes NDS32 cpu for GNU compiler
|
131
|
3 Copyright (C) 2012-2018 Free Software Foundation, Inc.
|
111
|
4 Contributed by Andes Technology Corporation.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published
|
|
10 by the Free Software Foundation; either version 3, or (at your
|
|
11 option) any later version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
16 License for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 /* ------------------------------------------------------------------------ */
|
|
23
|
131
|
24 #define IN_TARGET_CODE 1
|
|
25
|
111
|
26 #include "config.h"
|
|
27 #include "system.h"
|
|
28 #include "coretypes.h"
|
|
29 #include "backend.h"
|
|
30 #include "target.h"
|
|
31 #include "rtl.h"
|
|
32 #include "memmodel.h"
|
|
33 #include "emit-rtl.h"
|
|
34 #include "explow.h"
|
131
|
35 #include "tree.h"
|
|
36 #include "expr.h"
|
|
37 #include "optabs.h"
|
|
38 #include "nds32-protos.h"
|
|
39
|
|
40 /* ------------------------------------------------------------------------ */
|
|
41
|
|
42 /* Auxiliary static function definitions. */
|
|
43
|
|
44 static void
|
|
45 nds32_emit_load_store (rtx reg, rtx mem,
|
|
46 enum machine_mode mode,
|
|
47 int offset, bool load_p)
|
|
48 {
|
|
49 rtx new_mem;
|
|
50 new_mem = adjust_address (mem, mode, offset);
|
|
51 if (load_p)
|
|
52 emit_move_insn (reg, new_mem);
|
|
53 else
|
|
54 emit_move_insn (new_mem, reg);
|
|
55 }
|
|
56
|
|
57 static void
|
|
58 nds32_emit_post_inc_load_store (rtx reg, rtx base_reg,
|
|
59 enum machine_mode mode,
|
|
60 bool load_p)
|
|
61 {
|
|
62 gcc_assert (GET_MODE (reg) == mode);
|
|
63 gcc_assert (GET_MODE (base_reg) == Pmode);
|
|
64
|
|
65 /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
|
|
66 not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */
|
|
67 if (load_p)
|
|
68 emit_move_insn (reg,
|
|
69 gen_rtx_MEM (mode,
|
|
70 base_reg));
|
|
71 else
|
|
72 emit_move_insn (gen_rtx_MEM (mode,
|
|
73 base_reg),
|
|
74 reg);
|
|
75
|
|
76 emit_move_insn (base_reg,
|
|
77 plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode)));
|
|
78 }
|
|
79
|
|
80 static void
|
|
81 nds32_emit_mem_move (rtx src, rtx dst,
|
|
82 enum machine_mode mode,
|
|
83 int addr_offset)
|
|
84 {
|
|
85 gcc_assert (MEM_P (src) && MEM_P (dst));
|
|
86 rtx tmp_reg = gen_reg_rtx (mode);
|
|
87 nds32_emit_load_store (tmp_reg, src, mode,
|
|
88 addr_offset, /* load_p */ true);
|
|
89 nds32_emit_load_store (tmp_reg, dst, mode,
|
|
90 addr_offset, /* load_p */ false);
|
|
91 }
|
|
92
|
|
93 static void
|
|
94 nds32_emit_mem_move_block (int base_regno, int count,
|
|
95 rtx *dst_base_reg, rtx *dst_mem,
|
|
96 rtx *src_base_reg, rtx *src_mem,
|
|
97 bool update_base_reg_p)
|
|
98 {
|
|
99 rtx new_base_reg;
|
|
100
|
|
101 emit_insn (nds32_expand_load_multiple (base_regno, count,
|
|
102 *src_base_reg, *src_mem,
|
|
103 update_base_reg_p, &new_base_reg));
|
|
104 if (update_base_reg_p)
|
|
105 {
|
|
106 *src_base_reg = new_base_reg;
|
|
107 *src_mem = gen_rtx_MEM (SImode, *src_base_reg);
|
|
108 }
|
|
109
|
|
110 emit_insn (nds32_expand_store_multiple (base_regno, count,
|
|
111 *dst_base_reg, *dst_mem,
|
|
112 update_base_reg_p, &new_base_reg));
|
|
113
|
|
114 if (update_base_reg_p)
|
|
115 {
|
|
116 *dst_base_reg = new_base_reg;
|
|
117 *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg);
|
|
118 }
|
|
119 }
|
|
120
|
|
121 /* ------------------------------------------------------------------------ */
|
|
122
|
|
123 /* Auxiliary function for expand movmem pattern. */
|
|
124
|
|
125 static bool
|
|
126 nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem,
|
|
127 rtx size,
|
|
128 rtx alignment)
|
|
129 {
|
|
130 /* Emit loop version of movmem.
|
|
131
|
|
132 andi $size_least_3_bit, $size, #~7
|
|
133 add $dst_end, $dst, $size
|
|
134 move $dst_itr, $dst
|
|
135 move $src_itr, $src
|
|
136 beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
|
|
137 add $double_word_end, $dst, $size_least_3_bit
|
|
138
|
|
139 .Ldouble_word_mode_loop:
|
|
140 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
|
141 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
|
|
142 ! move will delete after register allocation
|
|
143 move $src_itr, $src_itr'
|
|
144 move $dst_itr, $dst_itr'
|
|
145 ! Not readch upper bound. Loop.
|
|
146 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop
|
|
147
|
|
148 .Lbyte_mode_entry:
|
|
149 beq $dst_itr, $dst_end, .Lend_label
|
|
150 .Lbyte_mode_loop:
|
|
151 lbi.bi $tmp, [$src_itr], #1
|
|
152 sbi.bi $tmp, [$dst_itr], #1
|
|
153 ! Not readch upper bound. Loop.
|
|
154 bne $dst_itr, $dst_end, .Lbyte_mode_loop
|
|
155 .Lend_label:
|
|
156 */
|
|
157 rtx dst_base_reg, src_base_reg;
|
|
158 rtx dst_itr, src_itr;
|
|
159 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
|
|
160 rtx dst_end;
|
|
161 rtx size_least_3_bit;
|
|
162 rtx double_word_end;
|
|
163 rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label;
|
|
164 rtx tmp;
|
|
165 rtx mask_least_3_bit;
|
|
166 int start_regno;
|
|
167 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
|
|
168
|
|
169 if (TARGET_ISA_V3M && !align_to_4_bytes)
|
|
170 return 0;
|
|
171
|
|
172 if (TARGET_REDUCED_REGS)
|
|
173 start_regno = 2;
|
|
174 else
|
|
175 start_regno = 16;
|
|
176
|
|
177 dst_itr = gen_reg_rtx (Pmode);
|
|
178 src_itr = gen_reg_rtx (Pmode);
|
|
179 dst_end = gen_reg_rtx (Pmode);
|
|
180 tmp = gen_reg_rtx (QImode);
|
|
181 mask_least_3_bit = GEN_INT (~7);
|
|
182
|
|
183 double_word_mode_loop = gen_label_rtx ();
|
|
184 byte_mode_entry = gen_label_rtx ();
|
|
185 byte_mode_loop = gen_label_rtx ();
|
|
186 end_label = gen_label_rtx ();
|
|
187
|
|
188 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
|
|
189 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
|
|
190 /* andi $size_least_3_bit, $size, #~7 */
|
|
191 size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit,
|
|
192 NULL_RTX, 0, OPTAB_WIDEN);
|
|
193 /* add $dst_end, $dst, $size */
|
|
194 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
|
|
195 NULL_RTX, 0, OPTAB_WIDEN);
|
|
196
|
|
197 /* move $dst_itr, $dst
|
|
198 move $src_itr, $src */
|
|
199 emit_move_insn (dst_itr, dst_base_reg);
|
|
200 emit_move_insn (src_itr, src_base_reg);
|
|
201
|
|
202 /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
|
|
203 emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL,
|
|
204 SImode, 1, byte_mode_entry);
|
|
205 /* add $double_word_end, $dst, $size_least_3_bit */
|
|
206 double_word_end = expand_binop (Pmode, add_optab,
|
|
207 dst_base_reg, size_least_3_bit,
|
|
208 NULL_RTX, 0, OPTAB_WIDEN);
|
|
209
|
|
210 /* .Ldouble_word_mode_loop: */
|
|
211 emit_label (double_word_mode_loop);
|
|
212 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
|
213 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
|
|
214 src_itr_m = src_itr;
|
|
215 dst_itr_m = dst_itr;
|
|
216 srcmem_m = srcmem;
|
|
217 dstmem_m = dstmem;
|
|
218 nds32_emit_mem_move_block (start_regno, 2,
|
|
219 &dst_itr_m, &dstmem_m,
|
|
220 &src_itr_m, &srcmem_m,
|
|
221 true);
|
|
222 /* move $src_itr, $src_itr'
|
|
223 move $dst_itr, $dst_itr' */
|
|
224 emit_move_insn (dst_itr, dst_itr_m);
|
|
225 emit_move_insn (src_itr, src_itr_m);
|
|
226
|
|
227 /* ! Not readch upper bound. Loop.
|
|
228 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
|
|
229 emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL,
|
|
230 Pmode, 1, double_word_mode_loop);
|
|
231 /* .Lbyte_mode_entry: */
|
|
232 emit_label (byte_mode_entry);
|
|
233
|
|
234 /* beq $dst_itr, $dst_end, .Lend_label */
|
|
235 emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL,
|
|
236 Pmode, 1, end_label);
|
|
237 /* .Lbyte_mode_loop: */
|
|
238 emit_label (byte_mode_loop);
|
|
239
|
|
240 /* lbi.bi $tmp, [$src_itr], #1 */
|
|
241 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
|
|
242
|
|
243 /* sbi.bi $tmp, [$dst_itr], #1 */
|
|
244 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
|
|
245 /* ! Not readch upper bound. Loop.
|
|
246 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
|
|
247 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
|
|
248 SImode, 1, byte_mode_loop);
|
|
249
|
|
250 /* .Lend_label: */
|
|
251 emit_label (end_label);
|
|
252
|
|
253 return true;
|
|
254 }
|
|
255
|
|
256 static bool
|
|
257 nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
|
|
258 rtx size, rtx alignment)
|
|
259 {
|
|
260 rtx dst_base_reg, src_base_reg;
|
|
261 rtx dst_itr, src_itr;
|
|
262 rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
|
|
263 rtx dst_end;
|
|
264 rtx double_word_mode_loop, byte_mode_loop;
|
|
265 rtx tmp;
|
|
266 int start_regno;
|
|
267 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
|
|
268 unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
|
|
269
|
|
270 if (TARGET_ISA_V3M && !align_to_4_bytes)
|
|
271 return 0;
|
|
272
|
|
273 if (TARGET_REDUCED_REGS)
|
|
274 start_regno = 2;
|
|
275 else
|
|
276 start_regno = 16;
|
|
277
|
|
278 dst_itr = gen_reg_rtx (Pmode);
|
|
279 src_itr = gen_reg_rtx (Pmode);
|
|
280 dst_end = gen_reg_rtx (Pmode);
|
|
281 tmp = gen_reg_rtx (QImode);
|
|
282
|
|
283 double_word_mode_loop = gen_label_rtx ();
|
|
284 byte_mode_loop = gen_label_rtx ();
|
|
285
|
|
286 dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
|
|
287 src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
|
|
288
|
|
289 if (total_bytes < 8)
|
|
290 {
|
|
291 /* Emit total_bytes less than 8 loop version of movmem.
|
|
292 add $dst_end, $dst, $size
|
|
293 move $dst_itr, $dst
|
|
294 .Lbyte_mode_loop:
|
|
295 lbi.bi $tmp, [$src_itr], #1
|
|
296 sbi.bi $tmp, [$dst_itr], #1
|
|
297 ! Not readch upper bound. Loop.
|
|
298 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
|
|
299
|
|
300 /* add $dst_end, $dst, $size */
|
|
301 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
|
|
302 NULL_RTX, 0, OPTAB_WIDEN);
|
|
303 /* move $dst_itr, $dst
|
|
304 move $src_itr, $src */
|
|
305 emit_move_insn (dst_itr, dst_base_reg);
|
|
306 emit_move_insn (src_itr, src_base_reg);
|
|
307
|
|
308 /* .Lbyte_mode_loop: */
|
|
309 emit_label (byte_mode_loop);
|
|
310
|
|
311 /* lbi.bi $tmp, [$src_itr], #1 */
|
|
312 nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
|
|
313
|
|
314 /* sbi.bi $tmp, [$dst_itr], #1 */
|
|
315 nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
|
|
316 /* ! Not readch upper bound. Loop.
|
|
317 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
|
|
318 emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
|
|
319 SImode, 1, byte_mode_loop);
|
|
320 return true;
|
|
321 }
|
|
322 else if (total_bytes % 8 == 0)
|
|
323 {
|
|
324 /* Emit multiple of 8 loop version of movmem.
|
|
325
|
|
326 add $dst_end, $dst, $size
|
|
327 move $dst_itr, $dst
|
|
328 move $src_itr, $src
|
|
329
|
|
330 .Ldouble_word_mode_loop:
|
|
331 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
|
332 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
|
|
333 ! move will delete after register allocation
|
|
334 move $src_itr, $src_itr'
|
|
335 move $dst_itr, $dst_itr'
|
|
336 ! Not readch upper bound. Loop.
|
|
337 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
|
|
338
|
|
339 /* add $dst_end, $dst, $size */
|
|
340 dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
|
|
341 NULL_RTX, 0, OPTAB_WIDEN);
|
|
342
|
|
343 /* move $dst_itr, $dst
|
|
344 move $src_itr, $src */
|
|
345 emit_move_insn (dst_itr, dst_base_reg);
|
|
346 emit_move_insn (src_itr, src_base_reg);
|
|
347
|
|
348 /* .Ldouble_word_mode_loop: */
|
|
349 emit_label (double_word_mode_loop);
|
|
350 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
|
351 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
|
|
352 src_itr_m = src_itr;
|
|
353 dst_itr_m = dst_itr;
|
|
354 srcmem_m = srcmem;
|
|
355 dstmem_m = dstmem;
|
|
356 nds32_emit_mem_move_block (start_regno, 2,
|
|
357 &dst_itr_m, &dstmem_m,
|
|
358 &src_itr_m, &srcmem_m,
|
|
359 true);
|
|
360 /* move $src_itr, $src_itr'
|
|
361 move $dst_itr, $dst_itr' */
|
|
362 emit_move_insn (dst_itr, dst_itr_m);
|
|
363 emit_move_insn (src_itr, src_itr_m);
|
|
364
|
|
365 /* ! Not readch upper bound. Loop.
|
|
366 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
|
|
367 emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
|
|
368 Pmode, 1, double_word_mode_loop);
|
|
369 }
|
|
370 else
|
|
371 {
|
|
372 /* Handle size greater than 8, and not a multiple of 8. */
|
|
373 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
|
|
374 size, alignment);
|
|
375 }
|
|
376
|
|
377 return true;
|
|
378 }
|
|
379
|
|
380 static bool
|
|
381 nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem,
|
|
382 rtx size, rtx alignment)
|
|
383 {
|
|
384 if (CONST_INT_P (size))
|
|
385 return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem,
|
|
386 size, alignment);
|
|
387 else
|
|
388 return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
|
|
389 size, alignment);
|
|
390 }
|
|
391
|
|
392 static bool
|
|
393 nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem,
|
|
394 rtx total_bytes, rtx alignment)
|
|
395 {
|
|
396 rtx dst_base_reg, src_base_reg;
|
|
397 rtx tmp_reg;
|
|
398 int maximum_bytes;
|
|
399 int maximum_bytes_per_inst;
|
|
400 int maximum_regs;
|
|
401 int start_regno;
|
|
402 int i, inst_num;
|
|
403 HOST_WIDE_INT remain_bytes, remain_words;
|
|
404 bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
|
|
405 bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0;
|
|
406
|
|
407 /* Because reduced-set regsiters has few registers
|
|
408 (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
|
|
409 cannot be used for register allocation),
|
|
410 using 8 registers (32 bytes) for moving memory block
|
|
411 may easily consume all of them.
|
|
412 It makes register allocation/spilling hard to work.
|
|
413 So we only allow maximum=4 registers (16 bytes) for
|
|
414 moving memory block under reduced-set registers. */
|
|
415 if (TARGET_REDUCED_REGS)
|
|
416 {
|
|
417 maximum_regs = 4;
|
|
418 maximum_bytes = 64;
|
|
419 start_regno = 2;
|
|
420 }
|
|
421 else
|
|
422 {
|
|
423 /* $r25 is $tp so we use up to 8 registers. */
|
|
424 maximum_regs = 8;
|
|
425 maximum_bytes = 160;
|
|
426 start_regno = 16;
|
|
427 }
|
|
428 maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD;
|
|
429
|
|
430 /* 1. Total_bytes is integer for sure.
|
|
431 2. Alignment is integer for sure.
|
|
432 3. Maximum 4 or 10 registers and up to 4 instructions,
|
|
433 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
|
|
434 4. The dstmem cannot be volatile memory access.
|
|
435 5. The srcmem cannot be volatile memory access.
|
|
436 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
|
|
437 support unalign access with v3m configure. */
|
|
438 if (GET_CODE (total_bytes) != CONST_INT
|
|
439 || GET_CODE (alignment) != CONST_INT
|
|
440 || INTVAL (total_bytes) > maximum_bytes
|
|
441 || MEM_VOLATILE_P (dstmem)
|
|
442 || MEM_VOLATILE_P (srcmem)
|
|
443 || (TARGET_ISA_V3M && !align_to_4_bytes))
|
|
444 return false;
|
|
445
|
|
446 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
|
|
447 src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
|
|
448 remain_bytes = INTVAL (total_bytes);
|
|
449
|
|
450 /* Do not update base address for last lmw/smw pair. */
|
|
451 inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1))
|
|
452 / maximum_bytes_per_inst) - 1;
|
|
453
|
|
454 for (i = 0; i < inst_num; i++)
|
|
455 {
|
|
456 nds32_emit_mem_move_block (start_regno, maximum_regs,
|
|
457 &dst_base_reg, &dstmem,
|
|
458 &src_base_reg, &srcmem,
|
|
459 true);
|
|
460 }
|
|
461 remain_bytes -= maximum_bytes_per_inst * inst_num;
|
|
462
|
|
463 remain_words = remain_bytes / UNITS_PER_WORD;
|
|
464 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
|
|
465
|
|
466 if (remain_words != 0)
|
|
467 {
|
|
468 if (remain_bytes != 0)
|
|
469 nds32_emit_mem_move_block (start_regno, remain_words,
|
|
470 &dst_base_reg, &dstmem,
|
|
471 &src_base_reg, &srcmem,
|
|
472 true);
|
|
473 else
|
|
474 {
|
|
475 /* Do not update address if no further byte to move. */
|
|
476 if (remain_words == 1)
|
|
477 {
|
|
478 /* emit move instruction if align to 4 byte and only 1
|
|
479 word to move. */
|
|
480 if (align_to_4_bytes)
|
|
481 nds32_emit_mem_move (srcmem, dstmem, SImode, 0);
|
|
482 else
|
|
483 {
|
|
484 tmp_reg = gen_reg_rtx (SImode);
|
|
485 emit_insn (
|
|
486 gen_unaligned_load_w (tmp_reg,
|
|
487 gen_rtx_MEM (SImode, src_base_reg)));
|
|
488 emit_insn (
|
|
489 gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg),
|
|
490 tmp_reg));
|
|
491 }
|
|
492 }
|
|
493 else
|
|
494 nds32_emit_mem_move_block (start_regno, remain_words,
|
|
495 &dst_base_reg, &dstmem,
|
|
496 &src_base_reg, &srcmem,
|
|
497 false);
|
|
498 }
|
|
499 }
|
|
500
|
|
501 switch (remain_bytes)
|
|
502 {
|
|
503 case 3:
|
|
504 case 2:
|
|
505 {
|
|
506 if (align_to_2_bytes)
|
|
507 nds32_emit_mem_move (srcmem, dstmem, HImode, 0);
|
|
508 else
|
|
509 {
|
|
510 nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
|
|
511 nds32_emit_mem_move (srcmem, dstmem, QImode, 1);
|
|
512 }
|
|
513
|
|
514 if (remain_bytes == 3)
|
|
515 nds32_emit_mem_move (srcmem, dstmem, QImode, 2);
|
|
516 break;
|
|
517 }
|
|
518 case 1:
|
|
519 nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
|
|
520 break;
|
|
521 case 0:
|
|
522 break;
|
|
523 default:
|
|
524 gcc_unreachable ();
|
|
525 }
|
|
526
|
|
527 /* Successfully create patterns, return true. */
|
|
528 return true;
|
|
529 }
|
|
530
|
|
531 /* Function to move block memory content by
|
|
532 using load_multiple and store_multiple.
|
|
533 This is auxiliary extern function to help create rtx template.
|
|
534 Check nds32-multiple.md file for the patterns. */
|
|
535 bool
|
|
536 nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
|
|
537 {
|
|
538 if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment))
|
|
539 return true;
|
|
540
|
|
541 if (!optimize_size && optimize > 2)
|
|
542 return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment);
|
|
543
|
|
544 return false;
|
|
545 }
|
|
546
|
|
547 /* ------------------------------------------------------------------------ */
|
|
548
|
|
549 /* Auxiliary function for expand setmem pattern. */
|
|
550
|
|
551 static rtx
|
|
552 nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
|
|
553 {
|
|
554 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
|
|
555
|
|
556 if (CONST_INT_P (value))
|
|
557 {
|
|
558 unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
|
|
559 rtx new_val = gen_int_mode (val | (val << 8)
|
|
560 | (val << 16) | (val << 24), SImode);
|
|
561 /* Just calculate at here if it's constant value. */
|
|
562 emit_move_insn (value4word, new_val);
|
|
563 }
|
|
564 else
|
|
565 {
|
|
566 if (NDS32_EXT_DSP_P ())
|
|
567 {
|
|
568 /* ! prepare word
|
|
569 insb $tmp, $value, 1 ! $tmp <- 0x0000abab
|
|
570 pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */
|
|
571 rtx tmp = gen_reg_rtx (SImode);
|
|
572
|
|
573 convert_move (tmp, value, true);
|
|
574
|
|
575 emit_insn (
|
|
576 gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
|
|
577
|
|
578 emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
|
|
579 }
|
|
580 else
|
|
581 {
|
|
582 /* ! prepare word
|
|
583 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
|
|
584 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
|
|
585 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
|
|
586 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
|
|
587 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
|
|
588
|
|
589 rtx tmp1, tmp2, tmp3, tmp4;
|
|
590 tmp1 = expand_binop (SImode, and_optab, value,
|
|
591 gen_int_mode (0xff, SImode),
|
|
592 NULL_RTX, 0, OPTAB_WIDEN);
|
|
593 tmp2 = expand_binop (SImode, ashl_optab, tmp1,
|
|
594 gen_int_mode (8, SImode),
|
|
595 NULL_RTX, 0, OPTAB_WIDEN);
|
|
596 tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
|
|
597 NULL_RTX, 0, OPTAB_WIDEN);
|
|
598 tmp4 = expand_binop (SImode, ashl_optab, tmp3,
|
|
599 gen_int_mode (16, SImode),
|
|
600 NULL_RTX, 0, OPTAB_WIDEN);
|
|
601
|
|
602 emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
|
|
603 }
|
|
604 }
|
|
605
|
|
606 return value4word;
|
|
607 }
|
|
608
|
|
609 static rtx
|
|
610 nds32_gen_dup_4_byte_to_word_value (rtx value)
|
|
611 {
|
|
612 rtx value4word = gen_reg_rtx (SImode);
|
|
613 nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
|
|
614
|
|
615 return value4word;
|
|
616 }
|
|
617
|
|
618 static rtx
|
|
619 nds32_gen_dup_8_byte_to_double_word_value (rtx value)
|
|
620 {
|
|
621 rtx value4doubleword = gen_reg_rtx (DImode);
|
|
622
|
|
623 nds32_gen_dup_4_byte_to_word_value_aux (
|
|
624 value, nds32_di_low_part_subreg(value4doubleword));
|
|
625
|
|
626 emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
|
|
627 nds32_di_low_part_subreg(value4doubleword));
|
|
628 return value4doubleword;
|
|
629 }
|
|
630
|
|
631
|
|
632 static rtx
|
|
633 emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
|
|
634 {
|
|
635 rtx word_mode_label = gen_label_rtx ();
|
|
636 rtx word_mode_end_label = gen_label_rtx ();
|
|
637 rtx byte_mode_size = gen_reg_rtx (SImode);
|
|
638 rtx byte_mode_size_tmp = gen_reg_rtx (SImode);
|
|
639 rtx word_mode_end = gen_reg_rtx (SImode);
|
|
640 rtx size_for_word = gen_reg_rtx (SImode);
|
|
641
|
|
642 /* and $size_for_word, $size, #~0x7 */
|
|
643 size_for_word = expand_binop (SImode, and_optab, size,
|
|
644 gen_int_mode (~0x7, SImode),
|
|
645 NULL_RTX, 0, OPTAB_WIDEN);
|
|
646
|
|
647 emit_move_insn (byte_mode_size, size);
|
|
648
|
|
649 /* beqz $size_for_word, .Lbyte_mode_entry */
|
|
650 emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL,
|
|
651 SImode, 1, word_mode_end_label);
|
|
652 /* add $word_mode_end, $dst, $size_for_word */
|
|
653 word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
|
|
654 NULL_RTX, 0, OPTAB_WIDEN);
|
|
655
|
|
656 /* andi $byte_mode_size, $size, 0x7 */
|
|
657 byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
|
|
658 NULL_RTX, 0, OPTAB_WIDEN);
|
|
659
|
|
660 emit_move_insn (byte_mode_size, byte_mode_size_tmp);
|
|
661
|
|
662 /* .Lword_mode: */
|
|
663 emit_label (word_mode_label);
|
|
664 /* ! word-mode set loop
|
|
665 smw.bim $value4word, [$dst_itr], $value4word, 0
|
|
666 bne $word_mode_end, $dst_itr, .Lword_mode */
|
|
667 emit_insn (gen_unaligned_store_update_base_dw (itr,
|
|
668 itr,
|
|
669 value));
|
|
670 emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
|
|
671 Pmode, 1, word_mode_label);
|
|
672
|
|
673 emit_label (word_mode_end_label);
|
|
674
|
|
675 return byte_mode_size;
|
|
676 }
|
|
677
|
|
678 static rtx
|
|
679 emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
|
|
680 {
|
|
681 rtx end = gen_reg_rtx (Pmode);
|
|
682 rtx byte_mode_label = gen_label_rtx ();
|
|
683 rtx end_label = gen_label_rtx ();
|
|
684
|
|
685 value = force_reg (QImode, value);
|
|
686
|
|
687 if (need_end)
|
|
688 end = expand_binop (Pmode, add_optab, itr, size,
|
|
689 NULL_RTX, 0, OPTAB_WIDEN);
|
|
690 /* beqz $byte_mode_size, .Lend
|
|
691 add $byte_mode_end, $dst_itr, $byte_mode_size */
|
|
692 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL,
|
|
693 SImode, 1, end_label);
|
|
694
|
|
695 if (!need_end)
|
|
696 end = expand_binop (Pmode, add_optab, itr, size,
|
|
697 NULL_RTX, 0, OPTAB_WIDEN);
|
|
698
|
|
699 /* .Lbyte_mode: */
|
|
700 emit_label (byte_mode_label);
|
|
701
|
|
702 /* ! byte-mode set loop
|
|
703 sbi.bi $value, [$dst_itr] ,1
|
|
704 bne $byte_mode_end, $dst_itr, .Lbyte_mode */
|
|
705 nds32_emit_post_inc_load_store (value, itr, QImode, false);
|
|
706
|
|
707 emit_cmp_and_jump_insns (end, itr, NE, NULL,
|
|
708 Pmode, 1, byte_mode_label);
|
|
709 /* .Lend: */
|
|
710 emit_label (end_label);
|
|
711
|
|
712 if (need_end)
|
|
713 return end;
|
|
714 else
|
|
715 return NULL_RTX;
|
|
716 }
|
|
717
|
|
718 static bool
|
|
719 nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
|
|
720 {
|
|
721 rtx value4doubleword;
|
|
722 rtx value4byte;
|
|
723 rtx dst;
|
|
724 rtx byte_mode_size;
|
|
725
|
|
726 /* Emit loop version of setmem.
|
|
727 memset:
|
|
728 ! prepare word
|
|
729 andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab
|
|
730 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
|
|
731 or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab
|
|
732 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
|
|
733 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab
|
|
734
|
|
735 and $size_for_word, $size, #-4
|
|
736 beqz $size_for_word, .Lword_mode_end
|
|
737
|
|
738 add $word_mode_end, $dst, $size_for_word
|
|
739 andi $byte_mode_size, $size, 3
|
|
740
|
|
741 .Lword_mode:
|
|
742 ! word-mode set loop
|
|
743 smw.bim $value4word, [$dst], $value4word, 0
|
|
744 bne $word_mode_end, $dst, .Lword_mode
|
|
745
|
|
746 .Lword_mode_end:
|
|
747 beqz $byte_mode_size, .Lend
|
|
748 add $byte_mode_end, $dst, $byte_mode_size
|
|
749
|
|
750 .Lbyte_mode:
|
|
751 ! byte-mode set loop
|
|
752 sbi.bi $value4word, [$dst] ,1
|
|
753 bne $byte_mode_end, $dst, .Lbyte_mode
|
|
754 .Lend: */
|
|
755
|
|
756 dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
|
|
757
|
|
758 /* ! prepare word
|
|
759 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
|
|
760 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
|
|
761 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
|
|
762 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
|
|
763 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
|
|
764 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
|
|
765
|
|
766 /* and $size_for_word, $size, #-4
|
|
767 beqz $size_for_word, .Lword_mode_end
|
|
768
|
|
769 add $word_mode_end, $dst, $size_for_word
|
|
770 andi $byte_mode_size, $size, 3
|
|
771
|
|
772 .Lword_mode:
|
|
773 ! word-mode set loop
|
|
774 smw.bim $value4word, [$dst], $value4word, 0
|
|
775 bne $word_mode_end, $dst, .Lword_mode
|
|
776 .Lword_mode_end: */
|
|
777 byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
|
|
778
|
|
779 /* beqz $byte_mode_size, .Lend
|
|
780 add $byte_mode_end, $dst, $byte_mode_size
|
|
781
|
|
782 .Lbyte_mode:
|
|
783 ! byte-mode set loop
|
|
784 sbi.bi $value, [$dst] ,1
|
|
785 bne $byte_mode_end, $dst, .Lbyte_mode
|
|
786 .Lend: */
|
|
787
|
|
788 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
|
|
789 subreg_lowpart_offset (QImode, DImode));
|
|
790
|
|
791 emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
|
|
792
|
|
793 return true;
|
|
794 }
|
|
795
|
|
796 static bool
|
|
797 nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
|
|
798 {
|
|
799 rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
|
|
800 rtx need_align_bytes = gen_reg_rtx (SImode);
|
|
801 rtx last_2_bit = gen_reg_rtx (SImode);
|
|
802 rtx byte_loop_base = gen_reg_rtx (SImode);
|
|
803 rtx byte_loop_size = gen_reg_rtx (SImode);
|
|
804 rtx remain_size = gen_reg_rtx (SImode);
|
|
805 rtx new_base_reg;
|
|
806 rtx value4byte, value4doubleword;
|
|
807 rtx byte_mode_size;
|
|
808 rtx last_byte_loop_label = gen_label_rtx ();
|
|
809
|
|
810 size = force_reg (SImode, size);
|
|
811
|
|
812 value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
|
|
813 value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
|
|
814 subreg_lowpart_offset (QImode, DImode));
|
|
815
|
|
816 emit_move_insn (byte_loop_size, size);
|
|
817 emit_move_insn (byte_loop_base, base_reg);
|
|
818
|
|
819 /* Jump to last byte loop if size is less than 16. */
|
|
820 emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
|
|
821 SImode, 1, last_byte_loop_label);
|
|
822
|
|
823 /* Make sure align to 4 byte first since v3m can't unalign access. */
|
|
824 emit_insn (gen_andsi3 (last_2_bit,
|
|
825 base_reg,
|
|
826 gen_int_mode (0x3, SImode)));
|
|
827
|
|
828 emit_insn (gen_subsi3 (need_align_bytes,
|
|
829 gen_int_mode (4, SImode),
|
|
830 last_2_bit));
|
|
831
|
|
832 /* Align to 4 byte. */
|
|
833 new_base_reg = emit_setmem_byte_loop (base_reg,
|
|
834 need_align_bytes,
|
|
835 value4byte,
|
|
836 true);
|
|
837
|
|
838 /* Calculate remain size. */
|
|
839 emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
|
|
840
|
|
841 /* Set memory word by word. */
|
|
842 byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
|
|
843 remain_size,
|
|
844 value4doubleword);
|
|
845
|
|
846 emit_move_insn (byte_loop_base, new_base_reg);
|
|
847 emit_move_insn (byte_loop_size, byte_mode_size);
|
|
848
|
|
849 emit_label (last_byte_loop_label);
|
|
850
|
|
851 /* And set memory for remain bytes. */
|
|
852 emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
|
|
853 return true;
|
|
854 }
|
|
855
|
|
856 static bool
|
|
857 nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
|
|
858 rtx align ATTRIBUTE_UNUSED,
|
|
859 rtx expected_align ATTRIBUTE_UNUSED,
|
|
860 rtx expected_size ATTRIBUTE_UNUSED)
|
|
861 {
|
|
862 unsigned maximum_regs, maximum_bytes, start_regno, regno;
|
|
863 rtx value4word;
|
|
864 rtx dst_base_reg, new_base_reg;
|
|
865 unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
|
|
866 unsigned HOST_WIDE_INT real_size;
|
|
867
|
|
868 if (TARGET_REDUCED_REGS)
|
|
869 {
|
|
870 maximum_regs = 4;
|
|
871 maximum_bytes = 64;
|
|
872 start_regno = 2;
|
|
873 }
|
|
874 else
|
|
875 {
|
|
876 maximum_regs = 8;
|
|
877 maximum_bytes = 128;
|
|
878 start_regno = 16;
|
|
879 }
|
|
880
|
|
881 real_size = UINTVAL (size) & GET_MODE_MASK(SImode);
|
|
882
|
|
883 if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
|
|
884 return false;
|
|
885
|
|
886 remain_bytes = real_size;
|
|
887
|
|
888 gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
|
|
889
|
|
890 value4word = nds32_gen_dup_4_byte_to_word_value (value);
|
|
891
|
|
892 prepare_regs = remain_bytes / UNITS_PER_WORD;
|
|
893
|
|
894 dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
|
|
895
|
|
896 if (prepare_regs > maximum_regs)
|
|
897 prepare_regs = maximum_regs;
|
|
898
|
|
899 fill_per_smw = prepare_regs * UNITS_PER_WORD;
|
|
900
|
|
901 regno = start_regno;
|
|
902 switch (prepare_regs)
|
|
903 {
|
|
904 case 2:
|
|
905 default:
|
|
906 {
|
|
907 rtx reg0 = gen_rtx_REG (SImode, regno);
|
|
908 rtx reg1 = gen_rtx_REG (SImode, regno+1);
|
|
909 unsigned last_regno = start_regno + prepare_regs - 1;
|
|
910
|
|
911 emit_move_insn (reg0, value4word);
|
|
912 emit_move_insn (reg1, value4word);
|
|
913 rtx regd = gen_rtx_REG (DImode, regno);
|
|
914 regno += 2;
|
|
915
|
|
916 /* Try to utilize movd44! */
|
|
917 while (regno <= last_regno)
|
|
918 {
|
|
919 if ((regno + 1) <=last_regno)
|
|
920 {
|
|
921 rtx reg = gen_rtx_REG (DImode, regno);
|
|
922 emit_move_insn (reg, regd);
|
|
923 regno += 2;
|
|
924 }
|
|
925 else
|
|
926 {
|
|
927 rtx reg = gen_rtx_REG (SImode, regno);
|
|
928 emit_move_insn (reg, reg0);
|
|
929 regno += 1;
|
|
930 }
|
|
931 }
|
|
932 break;
|
|
933 }
|
|
934 case 1:
|
|
935 {
|
|
936 rtx reg = gen_rtx_REG (SImode, regno++);
|
|
937 emit_move_insn (reg, value4word);
|
|
938 }
|
|
939 break;
|
|
940 case 0:
|
|
941 break;
|
|
942 }
|
|
943
|
|
944 if (fill_per_smw)
|
|
945 for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
|
|
946 {
|
|
947 emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
|
|
948 dst_base_reg, dstmem,
|
|
949 true, &new_base_reg));
|
|
950 dst_base_reg = new_base_reg;
|
|
951 dstmem = gen_rtx_MEM (SImode, dst_base_reg);
|
|
952 }
|
|
953
|
|
954 remain_words = remain_bytes / UNITS_PER_WORD;
|
|
955
|
|
956 if (remain_words)
|
|
957 {
|
|
958 emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
|
|
959 dst_base_reg, dstmem,
|
|
960 true, &new_base_reg));
|
|
961 dst_base_reg = new_base_reg;
|
|
962 dstmem = gen_rtx_MEM (SImode, dst_base_reg);
|
|
963 }
|
|
964
|
|
965 remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
|
|
966
|
|
967 if (remain_bytes)
|
|
968 {
|
|
969 value = simplify_gen_subreg (QImode, value4word, SImode,
|
|
970 subreg_lowpart_offset(QImode, SImode));
|
|
971 int offset = 0;
|
|
972 for (;remain_bytes;--remain_bytes, ++offset)
|
|
973 {
|
|
974 nds32_emit_load_store (value, dstmem, QImode, offset, false);
|
|
975 }
|
|
976 }
|
|
977
|
|
978 return true;
|
|
979 }
|
|
980
|
|
981 bool
|
|
982 nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align,
|
|
983 rtx expected_align,
|
|
984 rtx expected_size)
|
|
985 {
|
|
986 bool align_to_4_bytes = (INTVAL (align) & 3) == 0;
|
|
987
|
|
988 /* Only expand at O3 */
|
|
989 if (optimize_size || optimize < 3)
|
|
990 return false;
|
|
991
|
|
992 if (TARGET_ISA_V3M && !align_to_4_bytes)
|
|
993 return nds32_expand_setmem_loop_v3m (dstmem, size, value);
|
|
994
|
|
995 if (nds32_expand_setmem_unroll (dstmem, size, value,
|
|
996 align, expected_align, expected_size))
|
|
997 return true;
|
|
998
|
|
999 return nds32_expand_setmem_loop (dstmem, size, value);
|
|
1000 }
|
|
1001
|
|
1002 /* ------------------------------------------------------------------------ */
|
|
1003
|
|
1004 /* Auxiliary function for expand strlen pattern. */
|
|
1005
|
|
1006 bool
|
|
1007 nds32_expand_strlen (rtx result, rtx str,
|
|
1008 rtx target_char, rtx align ATTRIBUTE_UNUSED)
|
|
1009 {
|
|
1010 rtx base_reg, backup_base_reg;
|
|
1011 rtx ffb_result;
|
|
1012 rtx target_char_ptr, length;
|
|
1013 rtx loop_label, tmp;
|
|
1014
|
|
1015 if (optimize_size || optimize < 3)
|
|
1016 return false;
|
|
1017
|
|
1018 gcc_assert (MEM_P (str));
|
|
1019 gcc_assert (CONST_INT_P (target_char) || REG_P (target_char));
|
|
1020
|
|
1021 base_reg = copy_to_mode_reg (SImode, XEXP (str, 0));
|
|
1022 loop_label = gen_label_rtx ();
|
|
1023
|
|
1024 ffb_result = gen_reg_rtx (Pmode);
|
|
1025 tmp = gen_reg_rtx (SImode);
|
|
1026 backup_base_reg = gen_reg_rtx (SImode);
|
|
1027
|
|
1028 /* Emit loop version of strlen.
|
|
1029 move $backup_base, $base
|
|
1030 .Lloop:
|
|
1031 lmw.bim $tmp, [$base], $tmp, 0
|
|
1032 ffb $ffb_result, $tmp, $target_char ! is there $target_char?
|
|
1033 beqz $ffb_result, .Lloop
|
|
1034 add $last_char_ptr, $base, $ffb_result
|
|
1035 sub $length, $last_char_ptr, $backup_base */
|
|
1036
|
|
1037 /* move $backup_base, $base */
|
|
1038 emit_move_insn (backup_base_reg, base_reg);
|
|
1039
|
|
1040 /* .Lloop: */
|
|
1041 emit_label (loop_label);
|
|
1042 /* lmw.bim $tmp, [$base], $tmp, 0 */
|
|
1043 emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg));
|
|
1044
|
|
1045 /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */
|
|
1046 emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char));
|
|
1047
|
|
1048 /* beqz $ffb_result, .Lloop */
|
|
1049 emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL,
|
|
1050 SImode, 1, loop_label);
|
|
1051
|
|
1052 /* add $target_char_ptr, $base, $ffb_result */
|
|
1053 target_char_ptr = expand_binop (Pmode, add_optab, base_reg,
|
|
1054 ffb_result, NULL_RTX, 0, OPTAB_WIDEN);
|
|
1055
|
|
1056 /* sub $length, $target_char_ptr, $backup_base */
|
|
1057 length = expand_binop (Pmode, sub_optab, target_char_ptr,
|
|
1058 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN);
|
|
1059
|
|
1060 emit_move_insn (result, length);
|
|
1061
|
|
1062 return true;
|
|
1063 }
|
111
|
1064
|
|
1065 /* ------------------------------------------------------------------------ */
|
|
1066
|
|
1067 /* Functions to expand load_multiple and store_multiple.
|
|
1068 They are auxiliary extern functions to help create rtx template.
|
|
1069 Check nds32-multiple.md file for the patterns. */
|
|
1070 rtx
|
|
1071 nds32_expand_load_multiple (int base_regno, int count,
|
131
|
1072 rtx base_addr, rtx basemem,
|
|
1073 bool update_base_reg_p,
|
|
1074 rtx *update_base_reg)
|
111
|
1075 {
|
|
1076 int par_index;
|
|
1077 int offset;
|
131
|
1078 int start_idx;
|
111
|
1079 rtx result;
|
|
1080 rtx new_addr, mem, reg;
|
|
1081
|
131
|
1082 /* Generate a unaligned load to prevent load instruction pull out from
|
|
1083 parallel, and then it will generate lwi, and lose unaligned acces */
|
|
1084 if (count == 1)
|
|
1085 {
|
|
1086 reg = gen_rtx_REG (SImode, base_regno);
|
|
1087 if (update_base_reg_p)
|
|
1088 {
|
|
1089 *update_base_reg = gen_reg_rtx (SImode);
|
|
1090 return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr);
|
|
1091 }
|
|
1092 else
|
|
1093 return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr));
|
|
1094 }
|
111
|
1095
|
131
|
1096 /* Create the pattern that is presented in nds32-multiple.md. */
|
|
1097 if (update_base_reg_p)
|
|
1098 {
|
|
1099 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
|
|
1100 start_idx = 1;
|
|
1101 }
|
|
1102 else
|
|
1103 {
|
|
1104 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
|
|
1105 start_idx = 0;
|
|
1106 }
|
|
1107
|
|
1108 if (update_base_reg_p)
|
|
1109 {
|
|
1110 offset = count * 4;
|
|
1111 new_addr = plus_constant (Pmode, base_addr, offset);
|
|
1112 *update_base_reg = gen_reg_rtx (SImode);
|
|
1113
|
|
1114 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
|
|
1115 }
|
111
|
1116
|
|
1117 for (par_index = 0; par_index < count; par_index++)
|
|
1118 {
|
|
1119 offset = par_index * 4;
|
|
1120 /* 4-byte for loading data to each register. */
|
|
1121 new_addr = plus_constant (Pmode, base_addr, offset);
|
|
1122 mem = adjust_automodify_address_nv (basemem, SImode,
|
|
1123 new_addr, offset);
|
|
1124 reg = gen_rtx_REG (SImode, base_regno + par_index);
|
|
1125
|
131
|
1126 XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem);
|
111
|
1127 }
|
|
1128
|
|
1129 return result;
|
|
1130 }
|
|
1131
|
|
1132 rtx
|
|
1133 nds32_expand_store_multiple (int base_regno, int count,
|
131
|
1134 rtx base_addr, rtx basemem,
|
|
1135 bool update_base_reg_p,
|
|
1136 rtx *update_base_reg)
|
111
|
1137 {
|
|
1138 int par_index;
|
|
1139 int offset;
|
131
|
1140 int start_idx;
|
111
|
1141 rtx result;
|
|
1142 rtx new_addr, mem, reg;
|
|
1143
|
131
|
1144 if (count == 1)
|
|
1145 {
|
|
1146 reg = gen_rtx_REG (SImode, base_regno);
|
|
1147 if (update_base_reg_p)
|
|
1148 {
|
|
1149 *update_base_reg = gen_reg_rtx (SImode);
|
|
1150 return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg);
|
|
1151 }
|
|
1152 else
|
|
1153 return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg);
|
|
1154 }
|
|
1155
|
111
|
1156 /* Create the pattern that is presented in nds32-multiple.md. */
|
|
1157
|
131
|
1158 if (update_base_reg_p)
|
|
1159 {
|
|
1160 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
|
|
1161 start_idx = 1;
|
|
1162 }
|
|
1163 else
|
|
1164 {
|
|
1165 result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
|
|
1166 start_idx = 0;
|
|
1167 }
|
|
1168
|
|
1169 if (update_base_reg_p)
|
|
1170 {
|
|
1171 offset = count * 4;
|
|
1172 new_addr = plus_constant (Pmode, base_addr, offset);
|
|
1173 *update_base_reg = gen_reg_rtx (SImode);
|
|
1174
|
|
1175 XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
|
|
1176 }
|
111
|
1177
|
|
1178 for (par_index = 0; par_index < count; par_index++)
|
|
1179 {
|
|
1180 offset = par_index * 4;
|
|
1181 /* 4-byte for storing data to memory. */
|
|
1182 new_addr = plus_constant (Pmode, base_addr, offset);
|
|
1183 mem = adjust_automodify_address_nv (basemem, SImode,
|
|
1184 new_addr, offset);
|
|
1185 reg = gen_rtx_REG (SImode, base_regno + par_index);
|
|
1186
|
131
|
1187 XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg);
|
111
|
1188 }
|
|
1189
|
|
1190 return result;
|
|
1191 }
|
|
1192
|
|
1193 /* ------------------------------------------------------------------------ */
|