1 files changed, 404 insertions, 0 deletions
diff --git a/newlib/libc/machine/arm/strcmp.c b/newlib/libc/machine/arm/strcmp.c
new file mode 100644
index 000000000..f7d39aa77
--- /dev/null
+++ b/newlib/libc/machine/arm/strcmp.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arm_asm.h"
+#include <_ansi.h>
+#include <string.h>
+
+#ifdef __ARMEB__
+#define SHFT2LSB "lsl"
+#define SHFT2MSB "lsr"
+#define MSB "0x000000ff"
+#define LSB "0xff000000"
+#else
+#define SHFT2LSB "lsr"
+#define SHFT2MSB "lsl"
+#define MSB "0xff000000"
+#define LSB "0x000000ff"
+#endif
+
+#ifdef __thumb2__
+#define magic1(REG) "#0x01010101"
+#define magic2(REG) "#0x80808080"
+#else
+#define magic1(REG) #REG
+#define magic2(REG) #REG ", lsl #7"
+#endif
+
+int 
+__attribute__((naked)) strcmp (const char* s1, const char* s2)
+{
+  asm(
+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
+      (defined (__thumb__) && !defined (__thumb2__)))
+      "optpld	r0\n\t"
+      "optpld	r1\n\t"
+      "eor	r2, r0, r1\n\t"
+      "tst	r2, #3\n\t"
+      /* Strings not at same byte offset from a word boundary.  */
+      "bne	strcmp_unaligned\n\t"
+      "ands	r2, r0, #3\n\t"
+      "bic	r0, r0, #3\n\t"
+      "bic	r1, r1, #3\n\t"
+      "ldr	ip, [r0], #4\n\t"
+      "it	eq\n\t"
+      "ldreq	r3, [r1], #4\n\t"
+      "beq	1f\n\t"
+      /* Although s1 and s2 have identical initial alignment, they are
+	 not currently word aligned.  Rather than comparing bytes,
+	 make sure that any bytes fetched from before the addressed
+	 bytes are forced to 0xff.  Then they will always compare
+	 equal.  */
+      "eor	r2, r2, #3\n\t"
+      "lsl	r2, r2, #3\n\t"
+      "mvn	r3, #"MSB"\n\t"
+      SHFT2LSB"	r2, r3, r2\n\t"
+      "ldr	r3, [r1], #4\n\t"
+      "orr	ip, ip, r2\n\t"
+      "orr	r3, r3, r2\n"
+ "1:\n\t"
+#ifndef __thumb2__
+      /* Load the 'magic' constant 0x01010101.  */
+      "str	r4, [sp, #-4]!\n\t"
+      "mov	r4, #1\n\t"
+      "orr	r4, r4, r4, lsl #8\n\t"
+      "orr	r4, r4, r4, lsl #16\n"
+#endif
+      ".p2align	2\n"
+ "4:\n\t"
+      "optpld	r0, #8\n\t"
+      "optpld	r1, #8\n\t"
+      "sub	r2, ip, "magic1(r4)"\n\t"
+      "cmp	ip, r3\n\t"
+      "itttt	eq\n\t"
+      /* check for any zero bytes in first word */
+      "eoreq	r2, r2, ip\n\t"
+      "tsteq	r2, "magic2(r4)"\n\t"
+      "ldreq	ip, [r0], #4\n\t"
+      "ldreq	r3, [r1], #4\n\t"
+      "beq	4b\n"
+ "2:\n\t"
+      /* There's a zero or a different byte in the word */
+      SHFT2MSB"	r0, ip, #24\n\t"
+      SHFT2LSB"	ip, ip, #8\n\t"
+      "cmp	r0, #1\n\t"
+      "it	cs\n\t"
+      "cmpcs	r0, r3, "SHFT2MSB" #24\n\t"
+      "it	eq\n\t"
+      SHFT2LSB"eq r3, r3, #8\n\t"
+      "beq	2b\n\t"
+      "sub	r0, r0, r3, "SHFT2MSB" #24\n\t"
+#ifndef __thumb2__
+      "ldr	r4, [sp], #4\n\t"
+#endif
+      "RETURN"
+#elif (defined (__thumb__) && !defined (__thumb2__))
+  "1:\n\t"
+      "ldrb	r2, [r0]\n\t"
+      "ldrb	r3, [r1]\n\t"
+      "add	r0, r0, #1\n\t"
+      "add	r1, r1, #1\n\t"
+      "cmp	r2, #0\n\t"
+      "beq	2f\n\t"
+      "cmp	r2, r3\n\t"
+      "beq	1b\n\t"
+  "2:\n\t"
+      "sub	r0, r2, r3\n\t"
+      "bx	lr"
+#else
+ "3:\n\t"
+      "ldrb	r2, [r0], #1\n\t"
+      "ldrb	r3, [r1], #1\n\t"
+      "cmp	r2, #1\n\t"
+      "it	cs\n\t"
+      "cmpcs	r2, r3\n\t"
+      "beq	3b\n\t"
+      "sub	r0, r2, r3\n\t"
+      "RETURN"
+#endif
+      );
+}
+
+#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
+      (defined (__thumb__) && !defined (__thumb2__)))
+static int __attribute__((naked, used)) 
+strcmp_unaligned(const char* s1, const char* s2)
+{
+#if 0
+  /* The assembly code below is based on the following alogrithm.  */
+#ifdef __ARMEB__
+#define RSHIFT <<
+#define LSHIFT >>
+#else
+#define RSHIFT >>
+#define LSHIFT <<
+#endif
+
+#define body(shift)							\
+  mask = 0xffffffffU RSHIFT shift;					\
+  w1 = *wp1++;								\
+  w2 = *wp2++;								\
+  do									\
+    {									\
+      t1 = w1 & mask;							\
+      if (__builtin_expect(t1 != w2 RSHIFT shift, 0))			\
+	{								\
+	  w2 RSHIFT= shift;						\
+	  break;							\
+	}								\
+      if (__builtin_expect(((w1 - b1) ^ w1) & (b1 << 7), 0))		\
+	{								\
+	  if ((((w1 - b1) ^ w1) & (b1 << 7)) & mask)			\
+	    w2 RSHIFT= shift;						\
+	  else								\
+	    {								\
+	      w2 = *wp2;						\
+	      t1 = w1 RSHIFT (32 - shift);				\
+	      w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
+	    }								\
+	  break;							\
+	}								\
+      w2 = *wp2++;							\
+      t1 ^= w1;								\
+      if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0))		\
+	{								\
+	  t1 = w1 >> (32 - shift);					\
+	  w2 = (w2 << (32 - shift)) RSHIFT (32 - shift);		\
+	  break;							\
+	}								\
+      w1 = *wp1++;							\
+    } while (1)
+
+  const unsigned* wp1;
+  const unsigned* wp2;
+  unsigned w1, w2;
+  unsigned mask;
+  unsigned shift;
+  unsigned b1 = 0x01010101;
+  char c1, c2;
+  unsigned t1;
+
+  while (((unsigned) s1) & 3)
+    {
+      c1 = *s1++;
+      c2 = *s2++;
+      if (c1 == 0 || c1 != c2)
+	return c1 - (int)c2;
+    }
+  wp1 = (unsigned*) (((unsigned)s1) & ~3);
+  wp2 = (unsigned*) (((unsigned)s2) & ~3);
+  t1 = ((unsigned) s2) & 3;
+  if (t1 == 1)
+    {
+      body(8);
+    }
+  else if (t1 == 2)
+    {
+      body(16);
+    }
+  else
+    {
+      body (24);
+    }
+  
+  do
+    {
+#ifdef __ARMEB__
+      c1 = (char) t1 >> 24;
+      c2 = (char) w2 >> 24;
+#else
+      c1 = (char) t1;
+      c2 = (char) w2;
+#endif
+      t1 RSHIFT= 8;
+      w2 RSHIFT= 8;
+    } while (c1 != 0 && c1 == c2);
+  return c1 - c2;
+#endif
+
+  asm("wp1 .req r0\n\t"
+      "wp2 .req r1\n\t"
+      "b1  .req r2\n\t"
+      "w1  .req r4\n\t"
+      "w2  .req r5\n\t"
+      "t1  .req ip\n\t"
+      "@ r3 is scratch\n"
+
+      /* First of all, compare bytes until wp1(sp1) is word-aligned. */
+ "1:\n\t"
+      "tst	wp1, #3\n\t"
+      "beq	2f\n\t"
+      "ldrb	r2, [wp1], #1\n\t"
+      "ldrb	r3, [wp2], #1\n\t"
+      "cmp	r2, #1\n\t"
+      "it	cs\n\t"
+      "cmpcs	r2, r3\n\t"
+      "beq	1b\n\t"
+      "sub	r0, r2, r3\n\t"
+      "RETURN\n"
+
+ "2:\n\t"
+      "str	r5, [sp, #-4]!\n\t"
+      "str	r4, [sp, #-4]!\n\t"
+      //      "stmfd	sp!, {r4, r5}\n\t"
+      "mov	b1, #1\n\t"
+      "orr	b1, b1, b1, lsl #8\n\t"
+      "orr	b1, b1, b1, lsl #16\n\t"
+
+      "and	t1, wp2, #3\n\t"
+      "bic	wp2, wp2, #3\n\t"
+      "ldr	w1, [wp1], #4\n\t"
+      "ldr	w2, [wp2], #4\n\t"
+      "cmp	t1, #2\n\t"
+      "beq	2f\n\t"
+      "bhi	3f\n"
+
+      /* Critical inner Loop: Block with 3 bytes initial overlap */
+      ".p2align	2\n"
+ "1:\n\t"
+      "bic	t1, w1, #"MSB"\n\t"
+      "cmp	t1, w2, "SHFT2LSB" #8\n\t"
+      "sub	r3, w1, b1\n\t"
+      "eor	r3, r3, w1\n\t"
+      "bne	4f\n\t"
+      "ands	r3, r3, b1, lsl #7\n\t"
+      "it	eq\n\t"
+      "ldreq	w2, [wp2], #4\n\t"
+      "bne	5f\n\t"
+      "eor	t1, t1, w1\n\t"
+      "cmp	t1, w2, "SHFT2MSB" #24\n\t"
+      "bne	6f\n\t"
+      "ldr	w1, [wp1], #4\n\t"
+      "b	1b\n"
+ "4:\n\t"
+      SHFT2LSB"	w2, w2, #8\n\t"
+      "b	8f\n"
+
+ "5:\n\t"
+      "bics	r3, r3, #"MSB"\n\t"
+      "bne	7f\n\t"
+      "ldrb	w2, [wp2]\n\t"
+      SHFT2LSB"	t1, w1, #24\n\t"
+#ifdef __ARMEB__
+      SHFT2LSB"	w2, w2, #24\n\t"
+#endif
+      "b	8f\n"
+
+ "6:\n\t"
+      SHFT2LSB"	t1, w1, #24\n\t"
+      "and	w2, w2, #"LSB"\n\t"
+      "b	8f\n"
+
+      /* Critical inner Loop: Block with 2 bytes initial overlap */
+      ".p2align	2\n"
+ "2:\n\t"
+      SHFT2MSB"	t1, w1, #16\n\t"
+      "sub	r3, w1, b1\n\t"
+      SHFT2LSB"	t1, t1, #16\n\t"
+      "eor	r3, r3, w1\n\t"
+      "cmp	t1, w2, "SHFT2LSB" #16\n\t"
+      "bne	4f\n\t"
+      "ands	r3, r3, b1, lsl #7\n\t"
+      "it	eq\n\t"
+      "ldreq	w2, [wp2], #4\n\t"
+      "bne	5f\n\t"
+      "eor	t1, t1, w1\n\t"
+      "cmp	t1, w2, "SHFT2MSB" #16\n\t"
+      "bne	6f\n\t"
+      "ldr	w1, [wp1], #4\n\t"
+      "b	2b\n"
+
+ "5:\n\t"
+      SHFT2MSB"s	r3, r3, #16\n\t"
+      "bne	7f\n\t"
+      "ldrh	w2, [wp2]\n\t"
+      SHFT2LSB"	t1, w1, #16\n\t"
+#ifdef __ARMEB__
+      SHFT2LSB"	w2, w2, #16\n\t"
+#endif
+      "b	8f\n"
+
+ "6:\n\t"
+      SHFT2MSB"	w2, w2, #16\n\t"
+      SHFT2LSB"	t1, w1, #16\n\t"
+ "4:\n\t"
+      SHFT2LSB"	w2, w2, #16\n\t"
+      "b	8f\n\t"
+
+      /* Critical inner Loop: Block with 1 byte initial overlap */
+      ".p2align	2\n"
+ "3:\n\t"
+      "and	t1, w1, #"LSB"\n\t"
+      "cmp	t1, w2, "SHFT2LSB" #24\n\t"
+      "sub	r3, w1, b1\n\t"
+      "eor	r3, r3, w1\n\t"
+      "bne	4f\n\t"
+      "ands	r3, r3, b1, lsl #7\n\t"
+      "it	eq\n\t"
+      "ldreq	w2, [wp2], #4\n\t"
+      "bne	5f\n\t"
+      "eor	t1, t1, w1\n\t"
+      "cmp	t1, w2, "SHFT2MSB" #8\n\t"
+      "bne	6f\n\t"
+      "ldr	w1, [wp1], #4\n\t"
+      "b	3b\n"
+ "4:\n\t"
+      SHFT2LSB"	w2, w2, #24\n\t"
+      "b	8f\n"
+ "5:\n\t"
+      "tst	r3, #128\n\t"
+      "bne	7f\n\t"
+      "ldr	w2, [wp2], #4\n"
+ "6:\n\t"
+      SHFT2LSB"	t1, w1, #8\n\t"
+      "bic	w2, w2, #"MSB"\n\t"
+      "b	8f\n"
+ "7:\n\t"
+      "mov	r0, #0\n\t"
+      //      "ldmfd	sp!, {r4, r5}\n\t"
+      "ldr	r4, [sp], #4\n\t"
+      "ldr	r5, [sp], #4\n\t"
+      "RETURN\n"
+ "8:\n\t"
+      "and	r2, t1, #"LSB"\n\t"
+      "and	r0, w2, #"LSB"\n\t"
+      "cmp	r0, #1\n\t"
+      "it	cs\n\t"
+      "cmpcs	r0, r2\n\t"
+      "itt	eq\n\t"
+      SHFT2LSB"eq	t1, t1, #8\n\t"
+      SHFT2LSB"eq	w2, w2, #8\n\t"
+      "beq	8b\n\t"
+      "sub	r0, r2, r0\n\t"
+      //      "ldmfd	sp!, {r4, r5}\n\t"
+      "ldr	r4, [sp], #4\n\t"
+      "ldr	r5, [sp], #4\n\t"
+      "RETURN");
+}
+
+#endif