Super User's BSD Cross Reference: /OpenBSD/sys/lib/libkern/arch/arm/memcpy.S

1 /* $OpenBSD: memcpy.S,v 1.6 2015年06月08日 14:22:05 jsg Exp $ */
2 /* $NetBSD: memcpy.S,v 1.2 2001年11月20日 00:29:20 chris Exp $ */
3
4 /*-
5 * Copyright (c) 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Neil A. Carson and Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <machine/asm.h>
34
35 /*
36 * This is one fun bit of code ...
37 * Some easy listening music is suggested while trying to understand this
38 * code e.g. Iron Maiden
39 *
40 * For anyone attempting to understand it :
41 *
42 * The core code is implemented here with simple stubs for memcpy()
43 * memmove() and bcopy().
44 *
45 * All local labels are prefixed with Lmemcpy_
46 * Following the prefix a label starting f is used in the forward copy code
47 * while a label using b is used in the backwards copy code
48 * The source and destination addresses determine whether a forward or
49 * backward copy is performed.
50 * Separate bits of code are used to deal with the following situations
51 * for both the forward and backwards copy.
52 * unaligned source address
53 * unaligned destination address
54 * Separate copy routines are used to produce an optimised result for each
55 * of these cases.
56 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
57 * a time where possible.
58 *
59 * Note: r12 (aka ip) can be trashed during the function along with
60 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
61 * Additional registers are preserved prior to use i.e. r4, r5 & lr
62 *
63 * Apologies for the state of the comments ;-)
64 */
65
66.syntax unified
67
68 ENTRY(memcpy)
69 ENTRY_NP(memmove)
70 /* Determine copy direction */
71 cmp r1, r0
72
73 moveq pc, lr
74
75 /* save leaf functions having to store this away */
76 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
77
78 bcc Lmemcpy_backwards
79
80 /* start of forwards copy */
81 subs r2, r2, #4
82 blt Lmemcpy_fl4 /* less than 4 bytes */
83 ands r12, r0, #3
84 bne Lmemcpy_fdestul /* oh unaligned destination addr */
85 ands r12, r1, #3
86 bne Lmemcpy_fsrcul /* oh unaligned source addr */
87
88 Lmemcpy_ft8:
89 /* We have aligned source and destination */
90 subs r2, r2, #8
91 blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
92 subs r2, r2, #0x14
93 blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
94 stmdb sp!, {r4} /* borrow r4 */
95
96 /* blat 32 bytes at a time */
97 /* XXX for really big copies perhaps we should use more registers */
98 Lmemcpy_floop32:
99 ldmia r1!, {r3, r4, r12, lr}
100 stmia r0!, {r3, r4, r12, lr}
101 ldmia r1!, {r3, r4, r12, lr}
102 stmia r0!, {r3, r4, r12, lr}
103 subs r2, r2, #0x20
104 bge Lmemcpy_floop32
105
106 cmn r2, #0x10
107 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
108 stmiage r0!, {r3, r4, r12, lr}
109 subge r2, r2, #0x10
110 ldmia sp!, {r4} /* return r4 */
111
112 Lmemcpy_fl32:
113 adds r2, r2, #0x14
114
115 /* blat 12 bytes at a time */
116 Lmemcpy_floop12:
117 ldmiage r1!, {r3, r12, lr}
118 stmiage r0!, {r3, r12, lr}
119 subsge r2, r2, #0x0c
120 bge Lmemcpy_floop12
121
122 Lmemcpy_fl12:
123 adds r2, r2, #8
124 blt Lmemcpy_fl4
125
126 subs r2, r2, #4
127 ldrlt r3, [r1], #4
128 strlt r3, [r0], #4
129 ldmiage r1!, {r3, r12}
130 stmiage r0!, {r3, r12}
131 subge r2, r2, #4
132
133 Lmemcpy_fl4:
134 /* less than 4 bytes to go */
135 adds r2, r2, #4
136#ifdef __APCS_26_
137 ldmiaeq sp!, {r0, pc}^ /* done */
138#else
139 ldmiaeq sp!, {r0, pc} /* done */
140#endif
141 /* copy the crud byte at a time */
142 cmp r2, #2
143 ldrb r3, [r1], #1
144 strb r3, [r0], #1
145 ldrbge r3, [r1], #1
146 strbge r3, [r0], #1
147 ldrbgt r3, [r1], #1
148 strbgt r3, [r0], #1
149 ldmia sp!, {r0, pc}
150
151 /* erg - unaligned destination */
152 Lmemcpy_fdestul:
153 rsb r12, r12, #4
154 cmp r12, #2
155
156 /* align destination with byte copies */
157 ldrb r3, [r1], #1
158 strb r3, [r0], #1
159 ldrbge r3, [r1], #1
160 strbge r3, [r0], #1
161 ldrbgt r3, [r1], #1
162 strbgt r3, [r0], #1
163 subs r2, r2, r12
164 blt Lmemcpy_fl4 /* less the 4 bytes */
165
166 ands r12, r1, #3
167 beq Lmemcpy_ft8 /* we have an aligned source */
168
169 /* erg - unaligned source */
170 /* This is where it gets nasty ... */
171 Lmemcpy_fsrcul:
172 bic r1, r1, #3
173 ldr lr, [r1], #4
174 cmp r12, #2
175 bgt Lmemcpy_fsrcul3
176 beq Lmemcpy_fsrcul2
177 cmp r2, #0x0c
178 blt Lmemcpy_fsrcul1loop4
179 sub r2, r2, #0x0c
180 stmdb sp!, {r4, r5}
181
182 Lmemcpy_fsrcul1loop16:
183 mov r3, lr, lsr #8
184 ldmia r1!, {r4, r5, r12, lr}
185 orr r3, r3, r4, lsl #24
186 mov r4, r4, lsr #8
187 orr r4, r4, r5, lsl #24
188 mov r5, r5, lsr #8
189 orr r5, r5, r12, lsl #24
190 mov r12, r12, lsr #8
191 orr r12, r12, lr, lsl #24
192 stmia r0!, {r3-r5, r12}
193 subs r2, r2, #0x10
194 bge Lmemcpy_fsrcul1loop16
195 ldmia sp!, {r4, r5}
196 adds r2, r2, #0x0c
197 blt Lmemcpy_fsrcul1l4
198
199 Lmemcpy_fsrcul1loop4:
200 mov r12, lr, lsr #8
201 ldr lr, [r1], #4
202 orr r12, r12, lr, lsl #24
203 str r12, [r0], #4
204 subs r2, r2, #4
205 bge Lmemcpy_fsrcul1loop4
206
207 Lmemcpy_fsrcul1l4:
208 sub r1, r1, #3
209 b Lmemcpy_fl4
210
211 Lmemcpy_fsrcul2:
212 cmp r2, #0x0c
213 blt Lmemcpy_fsrcul2loop4
214 sub r2, r2, #0x0c
215 stmdb sp!, {r4, r5}
216
217 Lmemcpy_fsrcul2loop16:
218 mov r3, lr, lsr #16
219 ldmia r1!, {r4, r5, r12, lr}
220 orr r3, r3, r4, lsl #16
221 mov r4, r4, lsr #16
222 orr r4, r4, r5, lsl #16
223 mov r5, r5, lsr #16
224 orr r5, r5, r12, lsl #16
225 mov r12, r12, lsr #16
226 orr r12, r12, lr, lsl #16
227 stmia r0!, {r3-r5, r12}
228 subs r2, r2, #0x10
229 bge Lmemcpy_fsrcul2loop16
230 ldmia sp!, {r4, r5}
231 adds r2, r2, #0x0c
232 blt Lmemcpy_fsrcul2l4
233
234 Lmemcpy_fsrcul2loop4:
235 mov r12, lr, lsr #16
236 ldr lr, [r1], #4
237 orr r12, r12, lr, lsl #16
238 str r12, [r0], #4
239 subs r2, r2, #4
240 bge Lmemcpy_fsrcul2loop4
241
242 Lmemcpy_fsrcul2l4:
243 sub r1, r1, #2
244 b Lmemcpy_fl4
245
246 Lmemcpy_fsrcul3:
247 cmp r2, #0x0c
248 blt Lmemcpy_fsrcul3loop4
249 sub r2, r2, #0x0c
250 stmdb sp!, {r4, r5}
251
252 Lmemcpy_fsrcul3loop16:
253 mov r3, lr, lsr #24
254 ldmia r1!, {r4, r5, r12, lr}
255 orr r3, r3, r4, lsl #8
256 mov r4, r4, lsr #24
257 orr r4, r4, r5, lsl #8
258 mov r5, r5, lsr #24
259 orr r5, r5, r12, lsl #8
260 mov r12, r12, lsr #24
261 orr r12, r12, lr, lsl #8
262 stmia r0!, {r3-r5, r12}
263 subs r2, r2, #0x10
264 bge Lmemcpy_fsrcul3loop16
265 ldmia sp!, {r4, r5}
266 adds r2, r2, #0x0c
267 blt Lmemcpy_fsrcul3l4
268
269 Lmemcpy_fsrcul3loop4:
270 mov r12, lr, lsr #24
271 ldr lr, [r1], #4
272 orr r12, r12, lr, lsl #8
273 str r12, [r0], #4
274 subs r2, r2, #4
275 bge Lmemcpy_fsrcul3loop4
276
277 Lmemcpy_fsrcul3l4:
278 sub r1, r1, #1
279 b Lmemcpy_fl4
280
281 Lmemcpy_backwards:
282 add r1, r1, r2
283 add r0, r0, r2
284 subs r2, r2, #4
285 blt Lmemcpy_bl4 /* less than 4 bytes */
286 ands r12, r0, #3
287 bne Lmemcpy_bdestul /* oh unaligned destination addr */
288 ands r12, r1, #3
289 bne Lmemcpy_bsrcul /* oh unaligned source addr */
290
291 Lmemcpy_bt8:
292 /* We have aligned source and destination */
293 subs r2, r2, #8
294 blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
295 stmdb sp!, {r4}
296 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
297 blt Lmemcpy_bl32
298
299 /* blat 32 bytes at a time */
300 /* XXX for really big copies perhaps we should use more registers */
301 Lmemcpy_bloop32:
302 ldmdb r1!, {r3, r4, r12, lr}
303 stmdb r0!, {r3, r4, r12, lr}
304 ldmdb r1!, {r3, r4, r12, lr}
305 stmdb r0!, {r3, r4, r12, lr}
306 subs r2, r2, #0x20
307 bge Lmemcpy_bloop32
308
309 Lmemcpy_bl32:
310 cmn r2, #0x10
311 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
312 stmdbge r0!, {r3, r4, r12, lr}
313 subge r2, r2, #0x10
314 adds r2, r2, #0x14
315 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
316 stmdbge r0!, {r3, r12, lr}
317 subge r2, r2, #0x0c
318 ldmia sp!, {r4}
319
320 Lmemcpy_bl12:
321 adds r2, r2, #8
322 blt Lmemcpy_bl4
323 subs r2, r2, #4
324 ldrlt r3, [r1, #-4]!
325 strlt r3, [r0, #-4]!
326 ldmdbge r1!, {r3, r12}
327 stmdbge r0!, {r3, r12}
328 subge r2, r2, #4
329
330 Lmemcpy_bl4:
331 /* less than 4 bytes to go */
332 adds r2, r2, #4
333 ldmiaeq sp!, {r0, pc}
334
335 /* copy the crud byte at a time */
336 cmp r2, #2
337 ldrb r3, [r1, #-1]!
338 strb r3, [r0, #-1]!
339 ldrbge r3, [r1, #-1]!
340 strbge r3, [r0, #-1]!
341 ldrbgt r3, [r1, #-1]!
342 strbgt r3, [r0, #-1]!
343 ldmia sp!, {r0, pc}
344
345 /* erg - unaligned destination */
346 Lmemcpy_bdestul:
347 cmp r12, #2
348
349 /* align destination with byte copies */
350 ldrb r3, [r1, #-1]!
351 strb r3, [r0, #-1]!
352 ldrbge r3, [r1, #-1]!
353 strbge r3, [r0, #-1]!
354 ldrbgt r3, [r1, #-1]!
355 strbgt r3, [r0, #-1]!
356 subs r2, r2, r12
357 blt Lmemcpy_bl4 /* less than 4 bytes to go */
358 ands r12, r1, #3
359 beq Lmemcpy_bt8 /* we have an aligned source */
360
361 /* erg - unaligned source */
362 /* This is where it gets nasty ... */
363 Lmemcpy_bsrcul:
364 bic r1, r1, #3
365 ldr r3, [r1, #0]
366 cmp r12, #2
367 blt Lmemcpy_bsrcul1
368 beq Lmemcpy_bsrcul2
369 cmp r2, #0x0c
370 blt Lmemcpy_bsrcul3loop4
371 sub r2, r2, #0x0c
372 stmdb sp!, {r4, r5}
373
374 Lmemcpy_bsrcul3loop16:
375 mov lr, r3, lsl #8
376 ldmdb r1!, {r3-r5, r12}
377 orr lr, lr, r12, lsr #24
378 mov r12, r12, lsl #8
379 orr r12, r12, r5, lsr #24
380 mov r5, r5, lsl #8
381 orr r5, r5, r4, lsr #24
382 mov r4, r4, lsl #8
383 orr r4, r4, r3, lsr #24
384 stmdb r0!, {r4, r5, r12, lr}
385 subs r2, r2, #0x10
386 bge Lmemcpy_bsrcul3loop16
387 ldmia sp!, {r4, r5}
388 adds r2, r2, #0x0c
389 blt Lmemcpy_bsrcul3l4
390
391 Lmemcpy_bsrcul3loop4:
392 mov r12, r3, lsl #8
393 ldr r3, [r1, #-4]!
394 orr r12, r12, r3, lsr #24
395 str r12, [r0, #-4]!
396 subs r2, r2, #4
397 bge Lmemcpy_bsrcul3loop4
398
399 Lmemcpy_bsrcul3l4:
400 add r1, r1, #3
401 b Lmemcpy_bl4
402
403 Lmemcpy_bsrcul2:
404 cmp r2, #0x0c
405 blt Lmemcpy_bsrcul2loop4
406 sub r2, r2, #0x0c
407 stmdb sp!, {r4, r5}
408
409 Lmemcpy_bsrcul2loop16:
410 mov lr, r3, lsl #16
411 ldmdb r1!, {r3-r5, r12}
412 orr lr, lr, r12, lsr #16
413 mov r12, r12, lsl #16
414 orr r12, r12, r5, lsr #16
415 mov r5, r5, lsl #16
416 orr r5, r5, r4, lsr #16
417 mov r4, r4, lsl #16
418 orr r4, r4, r3, lsr #16
419 stmdb r0!, {r4, r5, r12, lr}
420 subs r2, r2, #0x10
421 bge Lmemcpy_bsrcul2loop16
422 ldmia sp!, {r4, r5}
423 adds r2, r2, #0x0c
424 blt Lmemcpy_bsrcul2l4
425
426 Lmemcpy_bsrcul2loop4:
427 mov r12, r3, lsl #16
428 ldr r3, [r1, #-4]!
429 orr r12, r12, r3, lsr #16
430 str r12, [r0, #-4]!
431 subs r2, r2, #4
432 bge Lmemcpy_bsrcul2loop4
433
434 Lmemcpy_bsrcul2l4:
435 add r1, r1, #2
436 b Lmemcpy_bl4
437
438 Lmemcpy_bsrcul1:
439 cmp r2, #0x0c
440 blt Lmemcpy_bsrcul1loop4
441 sub r2, r2, #0x0c
442 stmdb sp!, {r4, r5}
443
444 Lmemcpy_bsrcul1loop32:
445 mov lr, r3, lsl #24
446 ldmdb r1!, {r3-r5, r12}
447 orr lr, lr, r12, lsr #8
448 mov r12, r12, lsl #24
449 orr r12, r12, r5, lsr #8
450 mov r5, r5, lsl #24
451 orr r5, r5, r4, lsr #8
452 mov r4, r4, lsl #24
453 orr r4, r4, r3, lsr #8
454 stmdb r0!, {r4, r5, r12, lr}
455 subs r2, r2, #0x10
456 bge Lmemcpy_bsrcul1loop32
457 ldmia sp!, {r4, r5}
458 adds r2, r2, #0x0c
459 blt Lmemcpy_bsrcul1l4
460
461 Lmemcpy_bsrcul1loop4:
462 mov r12, r3, lsl #24
463 ldr r3, [r1, #-4]!
464 orr r12, r12, r3, lsr #8
465 str r12, [r0, #-4]!
466 subs r2, r2, #4
467 bge Lmemcpy_bsrcul1loop4
468
469 Lmemcpy_bsrcul1l4:
470 add r1, r1, #1
471 b Lmemcpy_bl4
472
473 

AltStyle によって変換されたページ (->オリジナル) /