summaryrefslogtreecommitdiffstats
path: root/newlib/libc/machine/arm/strcmp-armv7m.S
blob: cdb4912df7a1b262c22186b3a169c9defc93879f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
/*
 * Copyright (c) 2012-2014 ARM Ltd
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the company may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* Very similar to the generic code, but uses Thumb2 as implemented
   in ARMv7-M.  */

/* Parameters and result.  */
#define src1		r0
#define src2		r1
#define result		r0	/* Overlaps src1.  */

/* Internal variables.  */
#define data1		r2
#define data2		r3
#define tmp2		r5
#define tmp1		r12
#define syndrome	r12	/* Overlaps tmp1 */

	.thumb
	.syntax unified
def_fn strcmp
	.cfi_sections .debug_frame
	.cfi_startproc
	eor	tmp1, src1, src2
	tst	tmp1, #3
	/* Strings not at same byte offset from a word boundary.  */
	bne	.Lstrcmp_unaligned
	ands	tmp1, src1, #3
	bic	src1, src1, #3
	bic	src2, src2, #3
	ldr	data1, [src1], #4
	it	eq
	ldreq	data2, [src2], #4
	beq	4f
	/* Although s1 and s2 have identical initial alignment, they are
	   not currently word aligned.	Rather than comparing bytes,
	   make sure that any bytes fetched from before the addressed
	   bytes are forced to 0xff.  Then they will always compare
	   equal.  */
	eor	tmp1, tmp1, #3
	mvn	data2, #MSB
	lsl	tmp1, tmp1, #3
	S2LO	tmp1, data2, tmp1
	ldr	data2, [src2], #4
	orr	data1, data1, tmp1
	orr	data2, data2, tmp1
	.p2align	2
	/* Critical loop.  */
4:
	sub	syndrome, data1, #0x01010101
	cmp	data1, data2
	/* check for any zero bytes in first word */
	itttt	eq
	biceq	syndrome, syndrome, data1
	tsteq	syndrome, #0x80808080
	ldreq	data1, [src1], #4
	ldreq	data2, [src2], #4
	beq	4b
2:
	/* There's a zero or a different byte in the word */
	S2HI	result, data1, #24
	S2LO	data1, data1, #8
	cmp	result, #1
	it	cs
	cmpcs	result, data2, S2HI #24
	it	eq
	S2LOEQ	data2, data2, #8
	beq	2b
	/* On a big-endian machine, RESULT contains the desired byte in bits
	   0-7; on a little-endian machine they are in bits 24-31.  In
	   both cases the other bits in RESULT are all zero.  For DATA2 the
	   interesting byte is at the other end of the word, but the
	   other bits are not necessarily zero.	 We need a signed result
	   representing the differnece in the unsigned bytes, so for the
	   little-endian case we can't just shift the interesting bits
	   up.	*/
#ifdef __ARM_BIG_ENDIAN
	sub	result, result, data2, lsr #24
#else
	and	data2, data2, #255
	lsrs	result, result, #24
	subs	result, result, data2
#endif
	bx	lr


#if 0
	/* The assembly code below is based on the following alogrithm.	 */
#ifdef __ARM_BIG_ENDIAN
#define RSHIFT <<
#define LSHIFT >>
#else
#define RSHIFT >>
#define LSHIFT <<
#endif

#define body(shift)							\
  mask = 0xffffffffU RSHIFT shift;					\
  data1 = *src1++;							\
  data2 = *src2++;							\
  do									\
    {									\
      tmp2 = data1 & mask;						\
      if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))		\
	{								\
	  data2 RSHIFT= shift;						\
	  break;							\
	}								\
      if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))	\
	{								\
	  /* See comment in assembler below re syndrome on big-endian */\
	  if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)		\
	    data2 RSHIFT= shift;					\
	  else								\
	    {								\
	      data2 = *src2;						\
	      tmp2 = data1 RSHIFT (32 - shift);				\
	      data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
	    }								\
	  break;							\
	}								\
      data2 = *src2++;							\
      tmp2 ^= data1;							\
      if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))	\
	{								\
	  tmp2 = data1 >> (32 - shift);					\
	  data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);		\
	  break;							\
	}								\
      data1 = *src1++;							\
    } while (1)

  const unsigned* src1;
  const unsigned* src2;
  unsigned data1, data2;
  unsigned mask;
  unsigned shift;
  unsigned b1 = 0x01010101;
  char c1, c2;
  unsigned tmp2;

  while (((unsigned) s1) & 3)
    {
      c1 = *s1++;
      c2 = *s2++;
      if (c1 == 0 || c1 != c2)
	return c1 - (int)c2;
    }
  src1 = (unsigned*) (((unsigned)s1) & ~3);
  src2 = (unsigned*) (((unsigned)s2) & ~3);
  tmp2 = ((unsigned) s2) & 3;
  if (tmp2 == 1)
    {
      body(8);
    }
  else if (tmp2 == 2)
    {
      body(16);
    }
  else
    {
      body (24);
    }

  do
    {
#ifdef __ARM_BIG_ENDIAN
      c1 = (char) tmp2 >> 24;
      c2 = (char) data2 >> 24;
#else /* not  __ARM_BIG_ENDIAN */
      c1 = (char) tmp2;
      c2 = (char) data2;
#endif /* not  __ARM_BIG_ENDIAN */
      tmp2 RSHIFT= 8;
      data2 RSHIFT= 8;
    } while (c1 != 0 && c1 == c2);
  return c1 - c2;
#endif /* 0 */


	/* First of all, compare bytes until src1(sp1) is word-aligned. */
.Lstrcmp_unaligned:
	tst	src1, #3
	beq	2f
	ldrb	data1, [src1], #1
	ldrb	data2, [src2], #1
	cmp	data1, #1
	it	cs
	cmpcs	data1, data2
	beq	.Lstrcmp_unaligned
	sub	result, data1, data2
	bx	lr

2:
	stmfd	sp!, {r5}
	.cfi_def_cfa_offset 4
	.cfi_offset 5, -4

	ldr	data1, [src1], #4
	and	tmp2, src2, #3
	bic	src2, src2, #3
	ldr	data2, [src2], #4
	cmp	tmp2, #2
	beq	.Loverlap2
	bhi	.Loverlap1

	/* Critical inner Loop: Block with 3 bytes initial overlap */
	.p2align	2
.Loverlap3:
	bic	tmp2, data1, #MSB
	cmp	tmp2, data2, S2LO #8
	sub	syndrome, data1, #0x01010101
	bic	syndrome, syndrome, data1
	bne	4f
	ands	syndrome, syndrome, #0x80808080
	it	eq
	ldreq	data2, [src2], #4
	bne	5f
	eor	tmp2, tmp2, data1
	cmp	tmp2, data2, S2HI #24
	bne	6f
	ldr	data1, [src1], #4
	b	.Loverlap3
4:
	S2LO	data2, data2, #8
	b	.Lstrcmp_tail

5:
#ifdef __ARM_BIG_ENDIAN
	/* The syndrome value may contain false ones if the string ends
	with the bytes 0x01 0x00.  */
	tst	data1, #0xff000000
	itt	ne
	tstne	data1, #0x00ff0000
	tstne	data1, #0x0000ff00
	beq	.Lstrcmp_done_equal
#else
	bics	syndrome, syndrome, #0xff000000
	bne	.Lstrcmp_done_equal
#endif
	ldrb	data2, [src2]
	S2LO	tmp2, data1, #24
#ifdef __ARM_BIG_ENDIAN
	lsl	data2, data2, #24
#endif
	b	.Lstrcmp_tail

6:
	S2LO	tmp2, data1, #24
	and	data2, data2, #LSB
	b	.Lstrcmp_tail

	/* Critical inner Loop: Block with 2 bytes initial overlap.  */
	.p2align	2
.Loverlap2:
	S2HI	tmp2, data1, #16
	sub	syndrome, data1, #0x01010101
	S2LO	tmp2, tmp2, #16
	bic	syndrome, syndrome, data1
	cmp	tmp2, data2, S2LO #16
	bne	4f
	ands	syndrome, syndrome, #0x80808080
	it	eq
	ldreq	data2, [src2], #4
	bne	5f
	eor	tmp2, tmp2, data1
	cmp	tmp2, data2, S2HI #16
	bne	6f
	ldr	data1, [src1], #4
	b	.Loverlap2

5:
#ifdef __ARM_BIG_ENDIAN
	/* The syndrome value may contain false ones if the string ends
	with the bytes 0x01 0x00 */
	tst	data1, #0xff000000
	it	ne
	tstne	data1, #0x00ff0000
	beq	.Lstrcmp_done_equal
#else
	lsls	syndrome, syndrome, #16
	bne	.Lstrcmp_done_equal
#endif
	ldrh	data2, [src2]
	S2LO	tmp2, data1, #16
#ifdef __ARM_BIG_ENDIAN
	lsl	data2, data2, #16
#endif
	b	.Lstrcmp_tail

6:
	S2HI	data2, data2, #16
	S2LO	tmp2, data1, #16
4:
	S2LO	data2, data2, #16
	b	.Lstrcmp_tail

	/* Critical inner Loop: Block with 1 byte initial overlap.  */
	.p2align	2
.Loverlap1:
	and	tmp2, data1, #LSB
	cmp	tmp2, data2, S2LO #24
	sub	syndrome, data1, #0x01010101
	bic	syndrome, syndrome, data1
	bne	4f
	ands	syndrome, syndrome, #0x80808080
	it	eq
	ldreq	data2, [src2], #4
	bne	5f
	eor	tmp2, tmp2, data1
	cmp	tmp2, data2, S2HI #8
	bne	6f
	ldr	data1, [src1], #4
	b	.Loverlap1
4:
	S2LO	data2, data2, #24
	b	.Lstrcmp_tail
5:
	/* The syndrome value may contain false ones if the string ends
	   with the bytes 0x01 0x00.  */
	tst	data1, #LSB
	beq	.Lstrcmp_done_equal
	ldr	data2, [src2], #4
6:
	S2LO	tmp2, data1, #8
	bic	data2, data2, #MSB
	b	.Lstrcmp_tail
.Lstrcmp_done_equal:
	mov	result, #0
	.cfi_remember_state
	ldmfd	sp!, {r5}
	.cfi_restore 5
	.cfi_def_cfa_offset 0
	bx	lr

.Lstrcmp_tail:
	.cfi_restore_state
	and	r2, tmp2, #LSB
	and	result, data2, #LSB
	cmp	result, #1
	it	cs
	cmpcs	result, r2
	itt	eq
	S2LOEQ	tmp2, tmp2, #8
	S2LOEQ	data2, data2, #8
	beq	.Lstrcmp_tail
	sub	result, r2, result
	ldmfd	sp!, {r5}
	.cfi_restore 5
	.cfi_def_cfa_offset 0
	bx	lr
	.cfi_endproc
	.size strcmp, . - strcmp