Super User's BSD Cross Reference: /OpenBSD/sys/lib/libkern/arch/sh/memset.S

1 /* $NetBSD: memset.S,v 1.1 2005年12月20日 19:28:50 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30
31#define REG_PTR r0
32#define REG_TMP1 r1
33
34#ifdef BZERO
35# define REG_C r2
36# define REG_DST r4
37# define REG_LEN r5
38#else
39# define REG_DST0 r3
40# define REG_DST r4
41# define REG_C r5
42# define REG_LEN r6
43#endif
44
45#ifdef BZERO
46 ENTRY(bzero)
47#else
48 ENTRY(memset)
49 mov REG_DST,REG_DST0 /* for return value */
50#endif
51 /* small amount to fill ? */
52 mov #28,REG_TMP1
53 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
54 bt/s large
55 mov #12,REG_TMP1 /* if (len >= 12) goto small; */
56 cmp/hs REG_TMP1,REG_LEN
57 bt/s small
58#ifdef BZERO
59 mov #0,REG_C
60#endif
61 /* very little fill (0 ~ 11 bytes) */
62 tst REG_LEN,REG_LEN
63 add REG_DST,REG_LEN
64 bt/s done
65 add #1,REG_DST
66
67 /* unroll 4 loops */
68 cmp/eq REG_DST,REG_LEN
69 1: mov.b REG_C,@-REG_LEN
70 bt/s done
71 cmp/eq REG_DST,REG_LEN
72 mov.b REG_C,@-REG_LEN
73 bt/s done
74 cmp/eq REG_DST,REG_LEN
75 mov.b REG_C,@-REG_LEN
76 bt/s done
77 cmp/eq REG_DST,REG_LEN
78 mov.b REG_C,@-REG_LEN
79 bf/s 1b
80 cmp/eq REG_DST,REG_LEN
81 done:
82#ifdef BZERO
83 rts
84 nop
85#else
86 rts
87 mov REG_DST0,r0
88#endif
89
90
91 small:
92 mov REG_DST,r0
93 tst #1,r0
94 bt/s small_aligned
95 mov REG_DST,REG_TMP1
96 shll REG_LEN
97 mova 1f,r0 /* 1f must be 4bytes aligned! */
98 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
99 sub REG_LEN,r0
100 jmp @r0
101 mov REG_C,r0
102
103 .align 2
104 mov.b r0,@(15,REG_TMP1)
105 mov.b r0,@(14,REG_TMP1)
106 mov.b r0,@(13,REG_TMP1)
107 mov.b r0,@(12,REG_TMP1)
108 mov.b r0,@(11,REG_TMP1)
109 mov.b r0,@(10,REG_TMP1)
110 mov.b r0,@(9,REG_TMP1)
111 mov.b r0,@(8,REG_TMP1)
112 mov.b r0,@(7,REG_TMP1)
113 mov.b r0,@(6,REG_TMP1)
114 mov.b r0,@(5,REG_TMP1)
115 mov.b r0,@(4,REG_TMP1)
116 mov.b r0,@(3,REG_TMP1)
117 mov.b r0,@(2,REG_TMP1)
118 mov.b r0,@(1,REG_TMP1)
119 mov.b r0,@REG_TMP1
120 mov.b r0,@(15,REG_DST)
121 mov.b r0,@(14,REG_DST)
122 mov.b r0,@(13,REG_DST)
123 mov.b r0,@(12,REG_DST)
124 mov.b r0,@(11,REG_DST)
125 mov.b r0,@(10,REG_DST)
126 mov.b r0,@(9,REG_DST)
127 mov.b r0,@(8,REG_DST)
128 mov.b r0,@(7,REG_DST)
129 mov.b r0,@(6,REG_DST)
130 mov.b r0,@(5,REG_DST)
131 mov.b r0,@(4,REG_DST)
132 mov.b r0,@(3,REG_DST)
133 mov.b r0,@(2,REG_DST)
134 mov.b r0,@(1,REG_DST)
135#ifdef BZERO
136 rts
137 1: mov.b r0,@REG_DST
138#else
139 mov.b r0,@REG_DST
140 1: rts
141 mov REG_DST0,r0
142#endif
143
144
145 /* 2 bytes aligned small fill */
146 small_aligned:
147#ifndef BZERO
148 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
149 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
150 or REG_TMP1,REG_C /* REG_C = ????xxxx */
151#endif
152
153 mov REG_LEN,r0
154 tst #1,r0 /* len is aligned? */
155 bt/s 1f
156 add #-1,r0
157 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
158 mov r0,REG_LEN
159 1:
160
161 mova 1f,r0 /* 1f must be 4bytes aligned! */
162 sub REG_LEN,r0
163 jmp @r0
164 mov REG_C,r0
165
166 .align 2
167 mov.w r0,@(30,REG_DST)
168 mov.w r0,@(28,REG_DST)
169 mov.w r0,@(26,REG_DST)
170 mov.w r0,@(24,REG_DST)
171 mov.w r0,@(22,REG_DST)
172 mov.w r0,@(20,REG_DST)
173 mov.w r0,@(18,REG_DST)
174 mov.w r0,@(16,REG_DST)
175 mov.w r0,@(14,REG_DST)
176 mov.w r0,@(12,REG_DST)
177 mov.w r0,@(10,REG_DST)
178 mov.w r0,@(8,REG_DST)
179 mov.w r0,@(6,REG_DST)
180 mov.w r0,@(4,REG_DST)
181 mov.w r0,@(2,REG_DST)
182#ifdef BZERO
183 rts
184 1: mov.w r0,@REG_DST
185#else
186 mov.w r0,@REG_DST
187 1: rts
188 mov REG_DST0,r0
189#endif
190
191
192
193 .align 2
194 large:
195#ifdef BZERO
196 mov #0,REG_C
197#else
198 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
199 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
200 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
201 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
202 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
203#endif
204
205 mov #3,REG_TMP1
206 tst REG_TMP1,REG_DST
207 mov REG_DST,REG_PTR
208 bf/s unaligned_dst
209 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
210 tst REG_TMP1,REG_LEN
211 bf/s unaligned_len
212
213 aligned:
214 /* fill 32*n bytes */
215 mov #32,REG_TMP1
216 cmp/hi REG_LEN,REG_TMP1
217 bt 9f
218 .align 2
219 1: sub REG_TMP1,REG_PTR
220 mov.l REG_C,@REG_PTR
221 sub REG_TMP1,REG_LEN
222 mov.l REG_C,@(4,REG_PTR)
223 cmp/hi REG_LEN,REG_TMP1
224 mov.l REG_C,@(8,REG_PTR)
225 mov.l REG_C,@(12,REG_PTR)
226 mov.l REG_C,@(16,REG_PTR)
227 mov.l REG_C,@(20,REG_PTR)
228 mov.l REG_C,@(24,REG_PTR)
229 bf/s 1b
230 mov.l REG_C,@(28,REG_PTR)
231 9:
232
233 /* fill left 4*n bytes */
234 cmp/eq REG_DST,REG_PTR
235 bt 9f
236 add #4,REG_DST
237 cmp/eq REG_DST,REG_PTR
238 1: mov.l REG_C,@-REG_PTR
239 bt/s 9f
240 cmp/eq REG_DST,REG_PTR
241 mov.l REG_C,@-REG_PTR
242 bt/s 9f
243 cmp/eq REG_DST,REG_PTR
244 mov.l REG_C,@-REG_PTR
245 bt/s 9f
246 cmp/eq REG_DST,REG_PTR
247 mov.l REG_C,@-REG_PTR
248 bf/s 1b
249 cmp/eq REG_DST,REG_PTR
250 9:
251#ifdef BZERO
252 rts
253 nop
254#else
255 rts
256 mov REG_DST0,r0
257#endif
258
259
260 unaligned_dst:
261 mov #1,REG_TMP1
262 tst REG_TMP1,REG_DST /* if (dst & 1) { */
263 add #1,REG_TMP1
264 bt/s 2f
265 tst REG_TMP1,REG_DST
266 mov.b REG_C,@REG_DST /* *dst++ = c; */
267 add #1,REG_DST
268 tst REG_TMP1,REG_DST
269 2: /* } */
270 /* if (dst & 2) { */
271 bt 4f
272 mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
273 add #2,REG_DST
274 4: /* } */
275
276
277 tst #3,REG_PTR /* if (ptr & 3) { */
278 bt/s 4f /* */
279 unaligned_len:
280 tst #1,REG_PTR /* if (ptr & 1) { */
281 bt/s 2f
282 tst #2,REG_PTR
283 mov.b REG_C,@-REG_PTR /* --ptr = c; */
284 2: /* } */
285 /* if (ptr & 2) { */
286 bt 4f
287 mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
288 4: /* } */
289 /* } */
290
291 mov REG_PTR,REG_LEN
292 bra aligned
293 sub REG_DST,REG_LEN
294
295 

AltStyle によって変換されたページ (->オリジナル) /