diff options
-rw-r--r-- | newlib/ChangeLog | 8 | ||||
-rw-r--r-- | newlib/libc/machine/i386/strchr.S | 113 |
2 files changed, 107 insertions, 14 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 7cdfd0356..35a80e266 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,9 @@ +2008-05-21 Eric Blake <ebb9@byu.net> + + Optimize strchr for x86. + * libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned + searches aren't penalized. Special-case searching for 0. + 2008-05-20 Nick Clifton <nickc@redhat.com> * libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak). @@ -5,7 +11,7 @@ 2008-05-20 DJ Delorie <dj@redhat.com> * libc/sys/sysnecv850/isatty.c (_isatty): Renamed from isatty. - + 2008-05-14 Jeff Johnston <jjohnstn@redhat.com> * libc/include/sys/reent.h: Change _REENT_INIT... macros to diff --git a/newlib/libc/machine/i386/strchr.S b/newlib/libc/machine/i386/strchr.S index fe425d2d8..1d98b8149 100644 --- a/newlib/libc/machine/i386/strchr.S +++ b/newlib/libc/machine/i386/strchr.S @@ -1,6 +1,6 @@ /* * ==================================================== - * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. + * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice @@ -9,7 +9,7 @@ */ #include "i386mach.h" - + .global SYM (strchr) SOTYPE_FUNCTION(strchr) @@ -21,14 +21,45 @@ SYM (strchr): pushl ebx xorl ebx,ebx movl 8(ebp),edi - movb 12(ebp),bl + addb 12(ebp),bl + +#ifndef __OPTIMIZE_SIZE__ +/* Special case strchr(p,0). */ + je L25 -#ifndef __OPTIMIZE_SIZE__ -/* check if string is aligned, if not do check one byte at a time */ +/* Do byte-wise checks until string is aligned. */ test $3,edi - jne L9 + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi + + test $3,edi + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi + + test $3,edi + je L5 + movl edi,eax + movb (eax),cl + testb cl,cl + je L14 + cmpb bl,cl + je L19 + incl edi /* create 4 byte mask which is just the desired byte repeated 4 times */ +L5: movl ebx,ecx sall $8,ebx subl $4,edi @@ -49,15 +80,14 @@ L10: testl $-2139062144,edx jne L9 - movl ebx,eax - xorl ecx,eax - leal -16843009(eax),edx - notl eax - andl eax,edx + xorl ebx,ecx + leal -16843009(ecx),edx + notl ecx + andl ecx,edx testl $-2139062144,edx je L10 #endif /* not __OPTIMIZE_SIZE__ */ - + /* loop while (*s && *s++ != c) */ L9: leal -1(edi),eax @@ -69,7 +99,7 @@ L15: je L14 cmpb bl,dl jne L15 - + L14: /* if (*s == c) return address otherwise return NULL */ cmpb bl,(eax) @@ -83,3 +113,60 @@ L19: leave ret +#ifndef __OPTIMIZE_SIZE__ +/* Special case strchr(p,0). */ +#if 0 + /* Hideous performance on modern machines. */ +L25: + cld + movl $-1,ecx + xor eax,eax + repnz + scasb + leal -1(edi),eax + jmp L19 +#endif +L25: +/* Do byte-wise checks until string is aligned. */ + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + + test $3,edi + je L26 + movl edi,eax + movb (eax),cl + testb cl,cl + je L19 + incl edi + +L26: + subl $4,edi + +/* loop performing 4 byte mask checking for desired 0 byte */ + .p2align 4,,7 +L27: + addl $4,edi + movl (edi),ecx + leal -16843009(ecx),edx + movl ecx,eax + notl eax + andl eax,edx + testl $-2139062144,edx + je L27 + + jmp L9 + +#endif /* !__OPTIMIZE_SIZE__ */ |