Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
422 | giacomo | 1 | #ifndef _I386_STRING_H_ |
2 | #define _I386_STRING_H_ |
||
3 | |||
4 | #ifdef __KERNEL__ |
||
5 | #include <linux/config.h> |
||
6 | /* |
||
7 | * On a 486 or Pentium, we are better off not using the |
||
8 | * byte string operations. But on a 386 or a PPro the |
||
9 | * byte string ops are faster than doing it by hand |
||
10 | * (MUCH faster on a Pentium). |
||
11 | */ |
||
12 | |||
13 | /* |
||
14 | * This string-include defines all string functions as inline |
||
15 | * functions. Use gcc. It also assumes ds=es=data space, this should be |
||
16 | * normal. Most of the string-functions are rather heavily hand-optimized, |
||
17 | * see especially strsep,strstr,str[c]spn. They should work, but are not |
||
18 | * very easy to understand. Everything is done entirely within the register |
||
19 | * set, making the functions fast and clean. String instructions have been |
||
20 | * used through-out, making for "slightly" unclear code :-) |
||
21 | * |
||
22 | * NO Copyright (C) 1991, 1992 Linus Torvalds, |
||
23 | * consider these trivial functions to be PD. |
||
24 | */ |
||
25 | |||
26 | #define __HAVE_ARCH_STRCPY |
||
27 | static inline char * strcpy(char * dest,const char *src) |
||
28 | { |
||
29 | int d0, d1, d2; |
||
30 | __asm__ __volatile__( |
||
31 | "1:\tlodsb\n\t" |
||
32 | "stosb\n\t" |
||
33 | "testb %%al,%%al\n\t" |
||
34 | "jne 1b" |
||
35 | : "=&S" (d0), "=&D" (d1), "=&a" (d2) |
||
36 | :"0" (src),"1" (dest) : "memory"); |
||
37 | return dest; |
||
38 | } |
||
39 | |||
40 | #define __HAVE_ARCH_STRNCPY |
||
41 | static inline char * strncpy(char * dest,const char *src,size_t count) |
||
42 | { |
||
43 | int d0, d1, d2, d3; |
||
44 | __asm__ __volatile__( |
||
45 | "1:\tdecl %2\n\t" |
||
46 | "js 2f\n\t" |
||
47 | "lodsb\n\t" |
||
48 | "stosb\n\t" |
||
49 | "testb %%al,%%al\n\t" |
||
50 | "jne 1b\n\t" |
||
51 | "rep\n\t" |
||
52 | "stosb\n" |
||
53 | "2:" |
||
54 | : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) |
||
55 | :"0" (src),"1" (dest),"2" (count) : "memory"); |
||
56 | return dest; |
||
57 | } |
||
58 | |||
59 | #define __HAVE_ARCH_STRCAT |
||
60 | static inline char * strcat(char * dest,const char * src) |
||
61 | { |
||
62 | int d0, d1, d2, d3; |
||
63 | __asm__ __volatile__( |
||
64 | "repne\n\t" |
||
65 | "scasb\n\t" |
||
66 | "decl %1\n" |
||
67 | "1:\tlodsb\n\t" |
||
68 | "stosb\n\t" |
||
69 | "testb %%al,%%al\n\t" |
||
70 | "jne 1b" |
||
71 | : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) |
||
72 | : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory"); |
||
73 | return dest; |
||
74 | } |
||
75 | |||
76 | #define __HAVE_ARCH_STRNCAT |
||
77 | static inline char * strncat(char * dest,const char * src,size_t count) |
||
78 | { |
||
79 | int d0, d1, d2, d3; |
||
80 | __asm__ __volatile__( |
||
81 | "repne\n\t" |
||
82 | "scasb\n\t" |
||
83 | "decl %1\n\t" |
||
84 | "movl %8,%3\n" |
||
85 | "1:\tdecl %3\n\t" |
||
86 | "js 2f\n\t" |
||
87 | "lodsb\n\t" |
||
88 | "stosb\n\t" |
||
89 | "testb %%al,%%al\n\t" |
||
90 | "jne 1b\n" |
||
91 | "2:\txorl %2,%2\n\t" |
||
92 | "stosb" |
||
93 | : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) |
||
94 | : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) |
||
95 | : "memory"); |
||
96 | return dest; |
||
97 | } |
||
98 | |||
99 | #define __HAVE_ARCH_STRCMP |
||
100 | static inline int strcmp(const char * cs,const char * ct) |
||
101 | { |
||
102 | int d0, d1; |
||
103 | register int __res; |
||
104 | __asm__ __volatile__( |
||
105 | "1:\tlodsb\n\t" |
||
106 | "scasb\n\t" |
||
107 | "jne 2f\n\t" |
||
108 | "testb %%al,%%al\n\t" |
||
109 | "jne 1b\n\t" |
||
110 | "xorl %%eax,%%eax\n\t" |
||
111 | "jmp 3f\n" |
||
112 | "2:\tsbbl %%eax,%%eax\n\t" |
||
113 | "orb $1,%%al\n" |
||
114 | "3:" |
||
115 | :"=a" (__res), "=&S" (d0), "=&D" (d1) |
||
116 | :"1" (cs),"2" (ct)); |
||
117 | return __res; |
||
118 | } |
||
119 | |||
120 | #define __HAVE_ARCH_STRNCMP |
||
121 | static inline int strncmp(const char * cs,const char * ct,size_t count) |
||
122 | { |
||
123 | register int __res; |
||
124 | int d0, d1, d2; |
||
125 | __asm__ __volatile__( |
||
126 | "1:\tdecl %3\n\t" |
||
127 | "js 2f\n\t" |
||
128 | "lodsb\n\t" |
||
129 | "scasb\n\t" |
||
130 | "jne 3f\n\t" |
||
131 | "testb %%al,%%al\n\t" |
||
132 | "jne 1b\n" |
||
133 | "2:\txorl %%eax,%%eax\n\t" |
||
134 | "jmp 4f\n" |
||
135 | "3:\tsbbl %%eax,%%eax\n\t" |
||
136 | "orb $1,%%al\n" |
||
137 | "4:" |
||
138 | :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) |
||
139 | :"1" (cs),"2" (ct),"3" (count)); |
||
140 | return __res; |
||
141 | } |
||
142 | |||
143 | #define __HAVE_ARCH_STRCHR |
||
144 | static inline char * strchr(const char * s, int c) |
||
145 | { |
||
146 | int d0; |
||
147 | register char * __res; |
||
148 | __asm__ __volatile__( |
||
149 | "movb %%al,%%ah\n" |
||
150 | "1:\tlodsb\n\t" |
||
151 | "cmpb %%ah,%%al\n\t" |
||
152 | "je 2f\n\t" |
||
153 | "testb %%al,%%al\n\t" |
||
154 | "jne 1b\n\t" |
||
155 | "movl $1,%1\n" |
||
156 | "2:\tmovl %1,%0\n\t" |
||
157 | "decl %0" |
||
158 | :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); |
||
159 | return __res; |
||
160 | } |
||
161 | |||
162 | #define __HAVE_ARCH_STRRCHR |
||
163 | static inline char * strrchr(const char * s, int c) |
||
164 | { |
||
165 | int d0, d1; |
||
166 | register char * __res; |
||
167 | __asm__ __volatile__( |
||
168 | "movb %%al,%%ah\n" |
||
169 | "1:\tlodsb\n\t" |
||
170 | "cmpb %%ah,%%al\n\t" |
||
171 | "jne 2f\n\t" |
||
172 | "leal -1(%%esi),%0\n" |
||
173 | "2:\ttestb %%al,%%al\n\t" |
||
174 | "jne 1b" |
||
175 | :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); |
||
176 | return __res; |
||
177 | } |
||
178 | |||
179 | #define __HAVE_ARCH_STRLEN |
||
180 | static inline size_t strlen(const char * s) |
||
181 | { |
||
182 | int d0; |
||
183 | register int __res; |
||
184 | __asm__ __volatile__( |
||
185 | "repne\n\t" |
||
186 | "scasb\n\t" |
||
187 | "notl %0\n\t" |
||
188 | "decl %0" |
||
189 | :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu)); |
||
190 | return __res; |
||
191 | } |
||
192 | |||
193 | static inline void * __memcpy(void * to, const void * from, size_t n) |
||
194 | { |
||
195 | int d0, d1, d2; |
||
196 | __asm__ __volatile__( |
||
197 | "rep ; movsl\n\t" |
||
198 | "testb $2,%b4\n\t" |
||
199 | "je 1f\n\t" |
||
200 | "movsw\n" |
||
201 | "1:\ttestb $1,%b4\n\t" |
||
202 | "je 2f\n\t" |
||
203 | "movsb\n" |
||
204 | "2:" |
||
205 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) |
||
206 | :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) |
||
207 | : "memory"); |
||
208 | return (to); |
||
209 | } |
||
210 | |||
211 | /* |
||
212 | * This looks horribly ugly, but the compiler can optimize it totally, |
||
213 | * as the count is constant. |
||
214 | */ |
||
215 | static inline void * __constant_memcpy(void * to, const void * from, size_t n) |
||
216 | { |
||
217 | if (n <= 128) |
||
218 | return __builtin_memcpy(to, from, n); |
||
219 | |||
220 | #define COMMON(x) \ |
||
221 | __asm__ __volatile__( \ |
||
222 | "rep ; movsl" \ |
||
223 | x \ |
||
224 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ |
||
225 | : "0" (n/4),"1" ((long) to),"2" ((long) from) \ |
||
226 | : "memory"); |
||
227 | { |
||
228 | int d0, d1, d2; |
||
229 | switch (n % 4) { |
||
230 | case 0: COMMON(""); return to; |
||
231 | case 1: COMMON("\n\tmovsb"); return to; |
||
232 | case 2: COMMON("\n\tmovsw"); return to; |
||
233 | default: COMMON("\n\tmovsw\n\tmovsb"); return to; |
||
234 | } |
||
235 | } |
||
236 | |||
237 | #undef COMMON |
||
238 | } |
||
239 | |||
240 | #define __HAVE_ARCH_MEMCPY |
||
241 | |||
242 | #ifdef CONFIG_X86_USE_3DNOW |
||
243 | |||
244 | #include <asm/mmx.h> |
||
245 | |||
246 | /* |
||
247 | * This CPU favours 3DNow strongly (eg AMD Athlon) |
||
248 | */ |
||
249 | |||
250 | static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) |
||
251 | { |
||
252 | if (len < 512) |
||
253 | return __constant_memcpy(to, from, len); |
||
254 | return _mmx_memcpy(to, from, len); |
||
255 | } |
||
256 | |||
257 | static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) |
||
258 | { |
||
259 | if (len < 512) |
||
260 | return __memcpy(to, from, len); |
||
261 | return _mmx_memcpy(to, from, len); |
||
262 | } |
||
263 | |||
264 | #define memcpy(t, f, n) \ |
||
265 | (__builtin_constant_p(n) ? \ |
||
266 | __constant_memcpy3d((t),(f),(n)) : \ |
||
267 | __memcpy3d((t),(f),(n))) |
||
268 | |||
269 | #else |
||
270 | |||
271 | /* |
||
272 | * No 3D Now! |
||
273 | */ |
||
274 | |||
275 | #define memcpy(t, f, n) \ |
||
276 | (__builtin_constant_p(n) ? \ |
||
277 | __constant_memcpy((t),(f),(n)) : \ |
||
278 | __memcpy((t),(f),(n))) |
||
279 | |||
280 | #endif |
||
281 | |||
282 | /* |
||
283 | * struct_cpy(x,y), copy structure *x into (matching structure) *y. |
||
284 | * |
||
285 | * We get link-time errors if the structure sizes do not match. |
||
286 | * There is no runtime overhead, it's all optimized away at |
||
287 | * compile time. |
||
288 | */ |
||
289 | extern void __struct_cpy_bug (void); |
||
290 | |||
291 | #define struct_cpy(x,y) \ |
||
292 | ({ \ |
||
293 | if (sizeof(*(x)) != sizeof(*(y))) \ |
||
294 | __struct_cpy_bug; \ |
||
295 | memcpy(x, y, sizeof(*(x))); \ |
||
296 | }) |
||
297 | |||
298 | #define __HAVE_ARCH_MEMMOVE |
||
299 | static inline void * memmove(void * dest,const void * src, size_t n) |
||
300 | { |
||
301 | int d0, d1, d2; |
||
302 | if (dest<src) |
||
303 | __asm__ __volatile__( |
||
304 | "rep\n\t" |
||
305 | "movsb" |
||
306 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) |
||
307 | :"0" (n),"1" (src),"2" (dest) |
||
308 | : "memory"); |
||
309 | else |
||
310 | __asm__ __volatile__( |
||
311 | "std\n\t" |
||
312 | "rep\n\t" |
||
313 | "movsb\n\t" |
||
314 | "cld" |
||
315 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) |
||
316 | :"0" (n), |
||
317 | "1" (n-1+(const char *)src), |
||
318 | "2" (n-1+(char *)dest) |
||
319 | :"memory"); |
||
320 | return dest; |
||
321 | } |
||
322 | |||
323 | #define memcmp __builtin_memcmp |
||
324 | |||
325 | #define __HAVE_ARCH_MEMCHR |
||
326 | static inline void * memchr(const void * cs,int c,size_t count) |
||
327 | { |
||
328 | int d0; |
||
329 | register void * __res; |
||
330 | if (!count) |
||
331 | return NULL; |
||
332 | __asm__ __volatile__( |
||
333 | "repne\n\t" |
||
334 | "scasb\n\t" |
||
335 | "je 1f\n\t" |
||
336 | "movl $1,%0\n" |
||
337 | "1:\tdecl %0" |
||
338 | :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count)); |
||
339 | return __res; |
||
340 | } |
||
341 | |||
342 | static inline void * __memset_generic(void * s, char c,size_t count) |
||
343 | { |
||
344 | int d0, d1; |
||
345 | __asm__ __volatile__( |
||
346 | "rep\n\t" |
||
347 | "stosb" |
||
348 | : "=&c" (d0), "=&D" (d1) |
||
349 | :"a" (c),"1" (s),"0" (count) |
||
350 | :"memory"); |
||
351 | return s; |
||
352 | } |
||
353 | |||
354 | /* we might want to write optimized versions of these later */ |
||
355 | #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) |
||
356 | |||
357 | /* |
||
358 | * memset(x,0,y) is a reasonably common thing to do, so we want to fill |
||
359 | * things 32 bits at a time even when we don't know the size of the |
||
360 | * area at compile-time.. |
||
361 | */ |
||
362 | static inline void * __constant_c_memset(void * s, unsigned long c, size_t count) |
||
363 | { |
||
364 | int d0, d1; |
||
365 | __asm__ __volatile__( |
||
366 | "rep ; stosl\n\t" |
||
367 | "testb $2,%b3\n\t" |
||
368 | "je 1f\n\t" |
||
369 | "stosw\n" |
||
370 | "1:\ttestb $1,%b3\n\t" |
||
371 | "je 2f\n\t" |
||
372 | "stosb\n" |
||
373 | "2:" |
||
374 | : "=&c" (d0), "=&D" (d1) |
||
375 | :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) |
||
376 | :"memory"); |
||
377 | return (s); |
||
378 | } |
||
379 | |||
380 | /* Added by Gertjan van Wingerde to make minix and sysv module work */ |
||
381 | #define __HAVE_ARCH_STRNLEN |
||
382 | static inline size_t strnlen(const char * s, size_t count) |
||
383 | { |
||
384 | int d0; |
||
385 | register int __res; |
||
386 | __asm__ __volatile__( |
||
387 | "movl %2,%0\n\t" |
||
388 | "jmp 2f\n" |
||
389 | "1:\tcmpb $0,(%0)\n\t" |
||
390 | "je 3f\n\t" |
||
391 | "incl %0\n" |
||
392 | "2:\tdecl %1\n\t" |
||
393 | "cmpl $-1,%1\n\t" |
||
394 | "jne 1b\n" |
||
395 | "3:\tsubl %2,%0" |
||
396 | :"=a" (__res), "=&d" (d0) |
||
397 | :"c" (s),"1" (count)); |
||
398 | return __res; |
||
399 | } |
||
400 | /* end of additional stuff */ |
||
401 | |||
402 | #define __HAVE_ARCH_STRSTR |
||
403 | |||
404 | extern char *strstr(const char *cs, const char *ct); |
||
405 | |||
406 | /* |
||
407 | * This looks horribly ugly, but the compiler can optimize it totally, |
||
408 | * as we by now know that both pattern and count is constant.. |
||
409 | */ |
||
410 | static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) |
||
411 | { |
||
412 | switch (count) { |
||
413 | case 0: |
||
414 | return s; |
||
415 | case 1: |
||
416 | *(unsigned char *)s = pattern; |
||
417 | return s; |
||
418 | case 2: |
||
419 | *(unsigned short *)s = pattern; |
||
420 | return s; |
||
421 | case 3: |
||
422 | *(unsigned short *)s = pattern; |
||
423 | *(2+(unsigned char *)s) = pattern; |
||
424 | return s; |
||
425 | case 4: |
||
426 | *(unsigned long *)s = pattern; |
||
427 | return s; |
||
428 | } |
||
429 | #define COMMON(x) \ |
||
430 | __asm__ __volatile__( \ |
||
431 | "rep ; stosl" \ |
||
432 | x \ |
||
433 | : "=&c" (d0), "=&D" (d1) \ |
||
434 | : "a" (pattern),"0" (count/4),"1" ((long) s) \ |
||
435 | : "memory") |
||
436 | { |
||
437 | int d0, d1; |
||
438 | switch (count % 4) { |
||
439 | case 0: COMMON(""); return s; |
||
440 | case 1: COMMON("\n\tstosb"); return s; |
||
441 | case 2: COMMON("\n\tstosw"); return s; |
||
442 | default: COMMON("\n\tstosw\n\tstosb"); return s; |
||
443 | } |
||
444 | } |
||
445 | |||
446 | #undef COMMON |
||
447 | } |
||
448 | |||
449 | #define __constant_c_x_memset(s, c, count) \ |
||
450 | (__builtin_constant_p(count) ? \ |
||
451 | __constant_c_and_count_memset((s),(c),(count)) : \ |
||
452 | __constant_c_memset((s),(c),(count))) |
||
453 | |||
454 | #define __memset(s, c, count) \ |
||
455 | (__builtin_constant_p(count) ? \ |
||
456 | __constant_count_memset((s),(c),(count)) : \ |
||
457 | __memset_generic((s),(c),(count))) |
||
458 | |||
459 | #define __HAVE_ARCH_MEMSET |
||
460 | #define memset(s, c, count) \ |
||
461 | (__builtin_constant_p(c) ? \ |
||
462 | __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ |
||
463 | __memset((s),(c),(count))) |
||
464 | |||
465 | /* |
||
466 | * find the first occurrence of byte 'c', or 1 past the area if none |
||
467 | */ |
||
468 | #define __HAVE_ARCH_MEMSCAN |
||
469 | static inline void * memscan(void * addr, int c, size_t size) |
||
470 | { |
||
471 | if (!size) |
||
472 | return addr; |
||
473 | __asm__("repnz; scasb\n\t" |
||
474 | "jnz 1f\n\t" |
||
475 | "dec %%edi\n" |
||
476 | "1:" |
||
477 | : "=D" (addr), "=c" (size) |
||
478 | : "0" (addr), "1" (size), "a" (c)); |
||
479 | return addr; |
||
480 | } |
||
481 | |||
482 | #endif /* __KERNEL__ */ |
||
483 | |||
484 | #endif |