/*
** x87 math in-lines:  fast math transcendentals for i386 and amd64 
**                     architectures, Gnu C environment.
**                     These in-line functions do not do any error 
**                     checking or trapping of any kind.
** 
** Note that MinGW 3.0.0 already does in-line math for sqrt(), sin(), 
** cos(), fabs().
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 2 of the License, or any later
** version.
**
** This program is distributed in the hope that it will be useful, but
** WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
** General Public License for more details.
**
** http://willus.com
**
** Some source taken from pp. 807-808 of Chapter 14 in the following
** link (Art of Assembly Language):
**
** http://webster.cs.ucr.edu/Page_asm/ArtofAssembly/pdf/0_AoAPDF.html
**
** For more on in-line assembly with Gnu C:
**
** http://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Extended-Asm.html#Extended%20Asm
**
** 2-23-04     Rev 1.01
**             pow() correctly returns zero for a first argument of zero
**             and one for 0^0.
**
** 10-8-03     Rev 1.00
**             Includes sincos(), pow(), exp(), atan2().
**
*/

#ifndef __INCLUDED_X87INLINE__

/* Possible method for setting __x87_inline_math__ */
#if (defined(__i386__) || defined(i386) || defined(__amd64) || defined(__x86_64) || defined(__x86_64__) || defined(__amd64__))
#if (!defined(__x87_inline_math__) && defined(__FAST_MATH__))
#define __x87_inline_math__
#endif
#endif

#ifdef __x87_inline_math__
/*
** Compute sine and cosine at same time (faster than separate calls).
** (*s) gets sin(x)
** (*c) gets cos(x)
*/
#define sincos(x,s,c) sincos_x87_inline(x,s,c)
void sincos_x87_inline(double x,double *s,double *c);
extern __inline__  void sincos_x87_inline(double x,double *s,double *c)
    {
    __asm__ ("fsincos;" : "=t" (*c), "=u" (*s) : "0" (x) : "st(7)");
    }
#else
#define sincos(th,x,y) { (*(x))=sin(th); (*(y))=cos(th); }
#endif


#ifdef __x87_inline_math__
/*
** in-line pow() function
** Computes x^y
*/
#define pow(x,y) pow_x87_inline(x,y)
double pow_x87_inline(double x,double y);
extern __inline__ double pow_x87_inline(double x,double y)
    {
    double result;
    short t1,t2;
    __asm__ (
             "fxch\n\t"
             "ftst\n\t"
             "fstsw\n\t"
             "and $0x40,%%ah\n\t"
             "jz 1f\n\t"
             "fstp %%st(0)\n\t"
             "ftst\n\t"
             "fstsw\n\t"
             "fstp %%st(0)\n\t"
             "and $0x40,%%ah\n\t"
             "jnz 0f\n\t"
             "fldz\n\t"
             "jmp 2f\n\t"
             "0:\n\t"
             "fld1\n\t"
             "jmp 2f\n\t"
             "1:\n\t"
             "fstcw %3\n\t"
             "fstcw %4\n\t"
             "orw $0xC00,%4\n\t"
             "fldcw %4\n\t"
             "fld1\n\t"
             "fxch\n\t"
             "fyl2x\n\t"
             "fmulp\n\t"
             "fld %%st(0)\n\t"
             "frndint\n\t"
             "fxch\n\t"
             "fsub %%st(1),%%st(0)\n\t"
             "f2xm1\n\t"
             "fld1\n\t"
             "faddp\n\t"
             "fxch\n\t"
             "fld1\n\t"
             "fscale\n\t"
             "fstp %%st(1)\n\t"
             "fmulp\n\t"
             "fldcw %3\n\t"
             "2:"
                     : "=t" (result)
                     : "0" (y), "u" (x), "m" (t1), "m" (t2)
                     : "st(1)", "st(7)", "%3", "%4", "ax" );
    return(result);
    }

/*
** in-line atan2(y,x) function.
** Computes arctan(y/x).
*/
#define atan2(y,x) atan2_x87_inline(y,x)
double atan2_x87_inline(double y,double x);
extern __inline__ double atan2_x87_inline(double y,double x)
    {
    double result;
    __asm__ ("fpatan" : "=t" (result) : "0" (x), "u" (y) : "st(1)");
    return(result);
    }

/*
** in-line exp(x) function.
** Computes e^x.
*/
#define exp(x) exp_x87_inline(x)
double exp_x87_inline(double x);
extern __inline__ double exp_x87_inline(double x)
    {
    double result;
    short t1,t2;
    __asm__ (
             "fstcw %2\n\t"
             "fstcw %3\n\t"
             "orw $0xC00,%3\n\t"
             "fldcw %3\n\t"
             "fldl2e\n\t"
             "fmulp\n\t"
             "fld %%st(0)\n\t"
             "frndint\n\t"
             "fxch\n\t"
             "fsub %%st(1),%%st(0)\n\t"
             "f2xm1\n\t"
             "fld1\n\t"
             "faddp\n\t"
             "fxch\n\t"
             "fld1\n\t"
             "fscale\n\t"
             "fstp %%st(1)\n\t"
             "fmulp\n\t"
             "fldcw %2"
                     : "=t" (result)
                     : "0" (x), "m" (t1), "m" (t2)
                     : "st(6)", "st(7)", "%2", "%3");
    return(result);
    }
#endif /* __x87_inline_math__ */

#define __INCLUDED_X87INLINE__
#endif

