1 files changed, 124 insertions, 0 deletions
diff --git a/winsup/cygwin/math/ceil.S b/winsup/cygwin/math/ceil.S
new file mode 100644
index 000000000..636df1e30
--- /dev/null
+++ b/winsup/cygwin/math/ceil.S
@@ -0,0 +1,124 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <_mingw_mac.h>
+
+	.file	"ceil.S"
+	.text
+	.align 4
+	.globl __MINGW_USYMBOL(ceil)
+	.def	__MINGW_USYMBOL(ceil);	.scl	2;	.type	32;	.endef
+#ifdef __x86_64__
+	.seh_proc	__MINGW_USYMBOL(ceil)
+#endif
+
+__MINGW_USYMBOL(ceil):
+#if defined(_AMD64_) || defined(__x86_64__)
+	.seh_endprologue
+	movd %xmm0, %rax
+	movq	%rax, %rcx
+	sarq	$52, %rcx
+	andl	$2047, %ecx
+	subl	$1023, %ecx
+	cmpl	$51, %ecx
+	jg	.is_intnaninf
+	/* Is x zero? */
+	testq	%rax, %rax
+	je	.ret_org
+	/* Is x signed? */
+	testl	%ecx, %ecx
+	js	.signed_val
+	/* Is x integral? */
+	movabsq	$4503599627370495, %rdx
+	sarq	%cl, %rdx
+	testq	%rax, %rdx
+	je	.ret_org
+	addsd	.huge(%rip), %xmm0
+	ucomisd	.zero(%rip), %xmm0
+	jbe	.doret
+	testq	%rax, %rax
+	jle	.l1
+	/* inexact ... */
+	movabsq	$4503599627370496, %r8
+	shrq	%cl, %r8
+	addq	%r8, %rax
+.l1:
+	notq	%rdx
+	andq	%rdx, %rax
+.doret:
+	movd %rax, %xmm0
+	ret
+	.p2align 4,,10
+.signed_val:
+	addsd	.huge(%rip), %xmm0
+	ucomisd	.zero(%rip), %xmm0
+	jbe	.doret2
+	testq	%rax, %rax
+	movabsq	$4607182418800017408, %rdx
+	movabsq	$-9223372036854775808, %rax
+	cmovns	%rdx, %rax
+	.p2align 4,,10
+.doret2:
+	movd %rax, %xmm0
+	ret
+
+	.p2align 4,,10
+.is_intnaninf:
+	/* Is Nan or Inf? */
+	cmpl	$1024, %ecx
+	je	.ret_naninf
+	.p2align 4,,10
+.ret_org:
+	/* return x.  */
+	rep
+	ret
+	.p2align 4,,10
+.ret_naninf:
+	/* return x + x; */
+	addsd	%xmm0, %xmm0
+	ret
+	.seh_endproc
+
+/* local data.  */
+	.section .rdata,"dr"
+	.align 8
+.huge:
+	.long	-2013235812
+	.long	2117592124
+	.align 8
+.zero:
+	.long	0
+	.long	0
+#elif defined(_ARM_) || defined(__arm__)
+	vmrs	r1, fpscr
+	bic		r0, r1, #0x00c00000
+	orr		r0, r0, #0x00400000 /* Round towards Plus Infinity */
+	vmsr	fpscr, r0
+	vcvtr.s32.f64	s0, d0
+	vcvt.f64.s32	d0, s0
+	vmsr	fpscr, r1
+	bx	lr
+#elif defined(_X86_) || defined(__i386__)
+	fldl	4(%esp)
+	subl	$8,%esp
+
+	fstcw	4(%esp)			/* store fpu control word */
+
+	/* We use here %edx although only the low 1 bits are defined.
+	   But none of the operations should care and they are faster
+	   than the 16 bit operations.  */
+	movl	$0x0800,%edx		/* round towards +oo */
+	orl	4(%esp),%edx
+	andl	$0xfbff,%edx
+	movl	%edx,(%esp)
+	fldcw	(%esp)			/* load modified control word */
+
+	frndint				/* round */
+
+	fldcw	4(%esp)			/* restore original control word */
+
+	addl	$8,%esp
+	ret
+#endif