thug/Code/Core/Math/Xbox/sse.h
2016-02-14 08:39:12 +11:00

159 lines
6.7 KiB
C

//-----------------------------------------------------------------------------
// File: SSE.h
//
// Desc: P3 SSE conversions and estimates
//
// Hist: 1.7.03 - Created
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//-----------------------------------------------------------------------------
#ifndef P3_SSE
#define P3_SSE
//-----------------------------------------------------------------------------
// Name: Ftoi_ASM
// Desc: SSE float to int conversion. Note that no control word needs to be
// set to round down
//-----------------------------------------------------------------------------
__forceinline int Ftoi_ASM( const float f )
{
__asm cvttss2si eax, f // return int(f)
}
//-----------------------------------------------------------------------------
// Name: ReciprocalEstimate_ASM
// Desc: SSE reciprocal estimate, accurate to 12 significant bits of
// the mantissa
//-----------------------------------------------------------------------------
__forceinline float ReciprocalEstimate_ASM( const float f )
{
float rec;
__asm rcpss xmm0, f // xmm0 = rcpss(f)
__asm movss rec , xmm0 // return xmm0
return rec;
}
//-----------------------------------------------------------------------------
// Name: ReciprocalSqrtEstimate_ASM
// Desc: SSE reciprocal square root estimate, accurate to 12 significant
// bits of the mantissa
//-----------------------------------------------------------------------------
__forceinline float ReciprocalSqrtEstimate_ASM( const float f )
{
float recsqrt;
__asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
__asm movss recsqrt, xmm0 // return xmm0
return recsqrt;
}
//-----------------------------------------------------------------------------
// Name: SqrtEstimae_ASM
// Desc: SSE square root estimate, accurate to 12 significant bits of
// the mantissa. Note that a check for zero must be made since
// sqrt(0) == 0 but 1/sqrt(0) = inf
//-----------------------------------------------------------------------------
__forceinline float SqrtEstimate_ASM( const float f )
{
float recsqrt;
__asm movss xmm0,f // xmm0 = f
__asm rsqrtss xmm1, xmm0 // xmm1 = rsqrtss(f)
__asm mulss xmm1, xmm0 // xmm1 = rsqrtss(f) * f = sqrt(f)
__asm xorps xmm2, xmm2 // xmm2 = 0
__asm cmpneqss xmm2, xmm0 // xmm2 = (f != 0 ? 1s : 0s)
__asm andps xmm1, xmm2 // xmm1 = xmm1 & xmm2
__asm movss recsqrt, xmm1 // return xmm1
return recsqrt;
}
//-----------------------------------------------------------------------------
// Name: ReciprocalEstimateNR_ASM
// Desc: SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
// bits of the mantissa
// One Newtown-Raphson Iteration:
// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
//-----------------------------------------------------------------------------
__forceinline float ReciprocalEstimateNR_ASM( const float f )
{
float rec;
__asm rcpss xmm0, f // xmm0 = rcpss(f)
__asm movss xmm1, f // xmm1 = f
__asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
__asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
__asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
__asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
// - f * rcpss(f) * rcpss(f)
__asm movss rec, xmm0 // return xmm0
return rec;
}
//-----------------------------------------------------------------------------
// Newton-Rapson square root iteration constants
//-----------------------------------------------------------------------------
const float g_SqrtNRConst[2] = {0.5f, 3.0f};
//-----------------------------------------------------------------------------
// Name: ReciprocalSqrtEstimateNR_ASM
// Desc: SSE Newton-Raphson reciprocal square root estimate, accurate to 23
// significant bits of the mantissa
// One Newtown-Raphson Iteration:
// f(i+1) = 0.5 * rsqrtss(f) * (3.0 - (f * rsqrtss(f) * rsqrtss(f))
// NOTE: rsqrtss(f) * rsqrtss(f) != rcpss(f) (presision is not maintained)
//-----------------------------------------------------------------------------
__forceinline float ReciprocalSqrtEstimateNR_ASM( const float f )
{
float recsqrt;
__asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
__asm movss xmm1, f // xmm1 = f
__asm mulss xmm1, xmm0 // xmm1 = f * rsqrtss(f)
__asm movss xmm2, g_SqrtNRConst+4 // xmm2 = 3.0f
__asm mulss xmm1, xmm0 // xmm1 = f * rsqrtss(f) * rsqrtss(f)
__asm mulss xmm0, g_SqrtNRConst // xmm0 = 0.5f * rsqrtss(f)
__asm subss xmm2, xmm1 // xmm2 = 3.0f -
// f * rsqrtss(f) * rsqrtss(f)
__asm mulss xmm0, xmm2 // xmm0 = 0.5 * rsqrtss(f)
// * (3.0 - (f * rsqrtss(f) * rsqrtss(f))
__asm movss recsqrt, xmm0 // return xmm0
return recsqrt;
}
//-----------------------------------------------------------------------------
// Name: SqrtEstimateNR_ASM
// Desc: SSE Newton-Raphson square root estimate, accurate to 23 significant
// bits of the mantissa
// NOTE: x/sqrt(x) = sqrt(x)
// One Newtown-Raphson Iteration (for 1/sqrt(x)) :
// f(i+1) = 0.5 * rsqrtss(f) * (3.0 - (f * rsqrtss(f) * rsqrtss(f))
// NOTE: rsqrtss(f) * rsqrtss(f) != rcpss(f) (presision is not maintained)
//-----------------------------------------------------------------------------
__forceinline float SqrtEstimateNR_ASM( const float f )
{
float recsqrt;
__asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
__asm movss xmm1, f // xmm1 = f
__asm mulss xmm1, xmm0 // xmm1 = f * rsqrtss(f)
__asm movss xmm2, g_SqrtNRConst+4 // xmm2 = 3.0f
__asm mulss xmm1, xmm0 // xmm1 = f * rsqrtss(f) * rsqrtss(f)
__asm mulss xmm0, g_SqrtNRConst // xmm0 = 0.5f * rsqrtss(f)
__asm subss xmm2, xmm1 // xmm2 = 3.0f -
// f * rsqrtss(f) * rsqrtss(f)
__asm mulss xmm0, xmm2 // xmm0 = 0.5 * rsqrtss(f)
// * (3.0 - (f * rsqrtss(f) * rsqrtss(f))
__asm xorps xmm1, xmm1 // xmm1 = 0
__asm mulss xmm0, f // xmm0 = sqrt(f)
__asm cmpneqss xmm1, f // xmm1 = (f != 0 ? 1s : 0s)
__asm andps xmm0, xmm1 // xmm0 = xmm1 & xmm2
__asm movss recsqrt, xmm0 // return xmm0
return recsqrt;
}
#endif // P3_SSE