mirror of
https://github.com/minetest/irrlicht.git
synced 2024-11-18 00:08:20 +01:00
3acf725ae3
- Add warnings to round_, round32 about different (and sometimes worse) behavior than the standard library, especially at high numbers. For example with input 8388609 standard library corrrectly returns 8388609, but we get 8388610. But as we still don't use C99/c++11 in Irrlicht 1.9 I have no easy way to fix this. - reciprocal_squareroot with IRRLICHT_FAST_MATH is less exact than it claims to be. Just commenting it. - document slightly different behavior of reciprocal with IRRLICHT_FAST_MATH around 0. - Kick out IRRLICHT_FAST_MATH for floor32, ceil32, round32. Their results are simply wrong, especially for uneven numbers. Like floor32(1) will be 0. or ceil32(-1) will also be 0. round32 wasn't as bad, but also down-rounding 0.5 and less exact. I know Burnings still wants those functions, so next commmit will add them again, but with new names and only an internal header. git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@6011 dfc29bdd-3216-0410-991c-e03cc46cb475
686 lines
19 KiB
C++
686 lines
19 KiB
C++
// Copyright (C) 2002-2012 Nikolaus Gebhardt
|
|
// This file is part of the "Irrlicht Engine".
|
|
// For conditions of distribution and use, see copyright notice in irrlicht.h
|
|
|
|
#ifndef __IRR_MATH_H_INCLUDED__
|
|
#define __IRR_MATH_H_INCLUDED__
|
|
|
|
#include "IrrCompileConfig.h"
|
|
#include "irrTypes.h"
|
|
#include <math.h>
|
|
#include <float.h>
|
|
#include <stdlib.h> // for abs() etc.
|
|
#include <limits.h> // For INT_MAX / UINT_MAX
|
|
|
|
#if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)
|
|
#define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))
|
|
#define sinf(X) (irr::f32)sin((irr::f64)(X))
|
|
#define cosf(X) (irr::f32)cos((irr::f64)(X))
|
|
#define asinf(X) (irr::f32)asin((irr::f64)(X))
|
|
#define acosf(X) (irr::f32)acos((irr::f64)(X))
|
|
#define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))
|
|
#define ceilf(X) (irr::f32)ceil((irr::f64)(X))
|
|
#define floorf(X) (irr::f32)floor((irr::f64)(X))
|
|
#define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))
|
|
#define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))
|
|
#define fabsf(X) (irr::f32)fabs((irr::f64)(X))
|
|
#define logf(X) (irr::f32)log((irr::f64)(X))
|
|
#endif
|
|
|
|
#ifndef FLT_MAX
|
|
#define FLT_MAX 3.402823466E+38F
|
|
#endif
|
|
|
|
#ifndef FLT_MIN
|
|
#define FLT_MIN 1.17549435e-38F
|
|
#endif
|
|
|
|
namespace irr
|
|
{
|
|
namespace core
|
|
{
|
|
|
|
//! Rounding error constant often used when comparing f32 values.
|
|
|
|
const s32 ROUNDING_ERROR_S32 = 0;
|
|
|
|
#ifdef __IRR_HAS_S64
|
|
const s64 ROUNDING_ERROR_S64 = 0;
|
|
#endif
|
|
const f32 ROUNDING_ERROR_f32 = 0.000001f;
|
|
const f64 ROUNDING_ERROR_f64 = 0.00000001;
|
|
|
|
#ifdef PI // make sure we don't collide with a define
|
|
#undef PI
|
|
#endif
|
|
//! Constant for PI.
|
|
const f32 PI = 3.14159265359f;
|
|
|
|
//! Constant for reciprocal of PI.
|
|
const f32 RECIPROCAL_PI = 1.0f/PI;
|
|
|
|
//! Constant for half of PI.
|
|
const f32 HALF_PI = PI/2.0f;
|
|
|
|
#ifdef PI64 // make sure we don't collide with a define
|
|
#undef PI64
|
|
#endif
|
|
//! Constant for 64bit PI.
|
|
const f64 PI64 = 3.1415926535897932384626433832795028841971693993751;
|
|
|
|
//! Constant for 64bit reciprocal of PI.
|
|
const f64 RECIPROCAL_PI64 = 1.0/PI64;
|
|
|
|
//! 32bit Constant for converting from degrees to radians
|
|
const f32 DEGTORAD = PI / 180.0f;
|
|
|
|
//! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)
|
|
const f32 RADTODEG = 180.0f / PI;
|
|
|
|
//! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)
|
|
const f64 DEGTORAD64 = PI64 / 180.0;
|
|
|
|
//! 64bit constant for converting from radians to degrees
|
|
const f64 RADTODEG64 = 180.0 / PI64;
|
|
|
|
//! Utility function to convert a radian value to degrees
|
|
/** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
|
|
\param radians The radians value to convert to degrees.
|
|
*/
|
|
inline f32 radToDeg(f32 radians)
|
|
{
|
|
return RADTODEG * radians;
|
|
}
|
|
|
|
//! Utility function to convert a radian value to degrees
|
|
/** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
|
|
\param radians The radians value to convert to degrees.
|
|
*/
|
|
inline f64 radToDeg(f64 radians)
|
|
{
|
|
return RADTODEG64 * radians;
|
|
}
|
|
|
|
//! Utility function to convert a degrees value to radians
|
|
/** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
|
|
\param degrees The degrees value to convert to radians.
|
|
*/
|
|
inline f32 degToRad(f32 degrees)
|
|
{
|
|
return DEGTORAD * degrees;
|
|
}
|
|
|
|
//! Utility function to convert a degrees value to radians
|
|
/** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
|
|
\param degrees The degrees value to convert to radians.
|
|
*/
|
|
inline f64 degToRad(f64 degrees)
|
|
{
|
|
return DEGTORAD64 * degrees;
|
|
}
|
|
|
|
//! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)
|
|
template<class T>
|
|
inline const T& min_(const T& a, const T& b)
|
|
{
|
|
return a < b ? a : b;
|
|
}
|
|
|
|
//! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)
|
|
template<class T>
|
|
inline const T& min_(const T& a, const T& b, const T& c)
|
|
{
|
|
return a < b ? min_(a, c) : min_(b, c);
|
|
}
|
|
|
|
//! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)
|
|
template<class T>
|
|
inline const T& max_(const T& a, const T& b)
|
|
{
|
|
return a < b ? b : a;
|
|
}
|
|
|
|
//! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)
|
|
template<class T>
|
|
inline const T& max_(const T& a, const T& b, const T& c)
|
|
{
|
|
return a < b ? max_(b, c) : max_(a, c);
|
|
}
|
|
|
|
//! returns abs of two values. Own implementation to get rid of STL (VS6 problems)
|
|
template<class T>
|
|
inline T abs_(const T& a)
|
|
{
|
|
return a < (T)0 ? -a : a;
|
|
}
|
|
|
|
//! returns linear interpolation of a and b with ratio t
|
|
//! \return: a if t==0, b if t==1, and the linear interpolation else
|
|
template<class T>
|
|
inline T lerp(const T& a, const T& b, const f32 t)
|
|
{
|
|
return (T)(a*(1.f-t)) + (b*t);
|
|
}
|
|
|
|
//! clamps a value between low and high
|
|
template <class T>
|
|
inline const T clamp (const T& value, const T& low, const T& high)
|
|
{
|
|
return min_ (max_(value,low), high);
|
|
}
|
|
|
|
//! swaps the content of the passed parameters
|
|
// Note: We use the same trick as boost and use two template arguments to
|
|
// avoid ambiguity when swapping objects of an Irrlicht type that has not
|
|
// it's own swap overload. Otherwise we get conflicts with some compilers
|
|
// in combination with stl.
|
|
template <class T1, class T2>
|
|
inline void swap(T1& a, T2& b)
|
|
{
|
|
T1 c(a);
|
|
a = b;
|
|
b = c;
|
|
}
|
|
|
|
template <class T>
|
|
inline T roundingError();
|
|
|
|
template <>
|
|
inline f32 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_f32;
|
|
}
|
|
|
|
template <>
|
|
inline f64 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_f64;
|
|
}
|
|
|
|
template <>
|
|
inline s32 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_S32;
|
|
}
|
|
|
|
template <>
|
|
inline u32 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_S32;
|
|
}
|
|
|
|
#ifdef __IRR_HAS_S64
|
|
template <>
|
|
inline s64 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_S64;
|
|
}
|
|
|
|
template <>
|
|
inline u64 roundingError()
|
|
{
|
|
return ROUNDING_ERROR_S64;
|
|
}
|
|
#endif
|
|
|
|
template <class T>
|
|
inline T relativeErrorFactor()
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
template <>
|
|
inline f32 relativeErrorFactor()
|
|
{
|
|
return 4;
|
|
}
|
|
|
|
template <>
|
|
inline f64 relativeErrorFactor()
|
|
{
|
|
return 8;
|
|
}
|
|
|
|
//! returns if a equals b, taking possible rounding errors into account
|
|
template <class T>
|
|
inline bool equals(const T a, const T b, const T tolerance = roundingError<T>())
|
|
{
|
|
return (a + tolerance >= b) && (a - tolerance <= b);
|
|
}
|
|
|
|
|
|
//! returns if a equals b, taking relative error in form of factor
|
|
//! this particular function does not involve any division.
|
|
template <class T>
|
|
inline bool equalsRelative( const T a, const T b, const T factor = relativeErrorFactor<T>())
|
|
{
|
|
//https://eagergames.wordpress.com/2017/04/01/fast-parallel-lines-and-vectors-test/
|
|
|
|
const T maxi = max_( a, b);
|
|
const T mini = min_( a, b);
|
|
const T maxMagnitude = max_( maxi, -mini);
|
|
|
|
return (maxMagnitude*factor + maxi) == (maxMagnitude*factor + mini); // MAD Wise
|
|
}
|
|
|
|
union FloatIntUnion32
|
|
{
|
|
FloatIntUnion32(float f1 = 0.0f) : f(f1) {}
|
|
// Portable sign-extraction
|
|
bool sign() const { return (i >> 31) != 0; }
|
|
|
|
irr::s32 i;
|
|
irr::f32 f;
|
|
};
|
|
|
|
//! We compare the difference in ULP's (spacing between floating-point numbers, aka ULP=1 means there exists no float between).
|
|
//\result true when numbers have a ULP <= maxUlpDiff AND have the same sign.
|
|
inline bool equalsByUlp(f32 a, f32 b, int maxUlpDiff)
|
|
{
|
|
// Based on the ideas and code from Bruce Dawson on
|
|
// http://www.altdevblogaday.com/2012/02/22/comparing-floating-point-numbers-2012-edition/
|
|
// When floats are interpreted as integers the two nearest possible float numbers differ just
|
|
// by one integer number. Also works the other way round, an integer of 1 interpreted as float
|
|
// is for example the smallest possible float number.
|
|
|
|
const FloatIntUnion32 fa(a);
|
|
const FloatIntUnion32 fb(b);
|
|
|
|
// Different signs, we could maybe get difference to 0, but so close to 0 using epsilons is better.
|
|
if ( fa.sign() != fb.sign() )
|
|
{
|
|
// Check for equality to make sure +0==-0
|
|
if (fa.i == fb.i)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// Find the difference in ULPs.
|
|
const int ulpsDiff = abs_(fa.i- fb.i);
|
|
if (ulpsDiff <= maxUlpDiff)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
//! returns if a equals zero, taking rounding errors into account
|
|
inline bool iszero(const f64 a, const f64 tolerance = ROUNDING_ERROR_f64)
|
|
{
|
|
return fabs(a) <= tolerance;
|
|
}
|
|
|
|
//! returns if a equals zero, taking rounding errors into account
|
|
inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
|
|
{
|
|
return fabsf(a) <= tolerance;
|
|
}
|
|
|
|
//! returns if a equals not zero, taking rounding errors into account
|
|
inline bool isnotzero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
|
|
{
|
|
return fabsf(a) > tolerance;
|
|
}
|
|
|
|
//! returns if a equals zero, taking rounding errors into account
|
|
inline bool iszero(const s32 a, const s32 tolerance = 0)
|
|
{
|
|
return ( a & 0x7ffffff ) <= tolerance;
|
|
}
|
|
|
|
//! returns if a equals zero, taking rounding errors into account
|
|
inline bool iszero(const u32 a, const u32 tolerance = 0)
|
|
{
|
|
return a <= tolerance;
|
|
}
|
|
|
|
#ifdef __IRR_HAS_S64
|
|
//! returns if a equals zero, taking rounding errors into account
|
|
inline bool iszero(const s64 a, const s64 tolerance = 0)
|
|
{
|
|
return abs_(a) <= tolerance;
|
|
}
|
|
#endif
|
|
|
|
inline s32 s32_min(s32 a, s32 b)
|
|
{
|
|
const s32 mask = (a - b) >> 31;
|
|
return (a & mask) | (b & ~mask);
|
|
}
|
|
|
|
inline s32 s32_max(s32 a, s32 b)
|
|
{
|
|
const s32 mask = (a - b) >> 31;
|
|
return (b & mask) | (a & ~mask);
|
|
}
|
|
|
|
inline s32 s32_clamp (s32 value, s32 low, s32 high)
|
|
{
|
|
return s32_min(s32_max(value,low), high);
|
|
}
|
|
|
|
/*
|
|
float IEEE-754 bit representation
|
|
|
|
0 0x00000000
|
|
1.0 0x3f800000
|
|
0.5 0x3f000000
|
|
3 0x40400000
|
|
+inf 0x7f800000
|
|
-inf 0xff800000
|
|
+NaN 0x7fc00000 or 0x7ff00000
|
|
in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)
|
|
*/
|
|
|
|
typedef union { u32 u; s32 s; f32 f; } inttofloat;
|
|
|
|
#define F32_AS_S32(f) (*((s32 *) &(f)))
|
|
#define F32_AS_U32(f) (*((u32 *) &(f)))
|
|
#define F32_AS_U32_POINTER(f) ( ((u32 *) &(f)))
|
|
|
|
#define F32_VALUE_0 0x00000000
|
|
#define F32_VALUE_1 0x3f800000
|
|
#define F32_SIGN_BIT 0x80000000U
|
|
#define F32_EXPON_MANTISSA 0x7FFFFFFFU
|
|
|
|
//! code is taken from IceFPU
|
|
//! Integer representation of a floating-point value.
|
|
#ifdef IRRLICHT_FAST_MATH
|
|
#define IR(x) ((u32&)(x))
|
|
#else
|
|
inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}
|
|
#endif
|
|
|
|
//! Absolute integer representation of a floating-point value
|
|
#define AIR(x) (IR(x)&0x7fffffff)
|
|
|
|
//! Floating-point representation of an integer value.
|
|
#ifdef IRRLICHT_FAST_MATH
|
|
#define FR(x) ((f32&)(x))
|
|
#else
|
|
inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}
|
|
inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}
|
|
#endif
|
|
|
|
//! integer representation of 1.0
|
|
#define IEEE_1_0 0x3f800000
|
|
//! integer representation of 255.0
|
|
#define IEEE_255_0 0x437f0000
|
|
|
|
#ifdef IRRLICHT_FAST_MATH
|
|
#define F32_LOWER_0(f) (F32_AS_U32(f) > F32_SIGN_BIT)
|
|
#define F32_LOWER_EQUAL_0(f) (F32_AS_S32(f) <= F32_VALUE_0)
|
|
#define F32_GREATER_0(f) (F32_AS_S32(f) > F32_VALUE_0)
|
|
#define F32_GREATER_EQUAL_0(f) (F32_AS_U32(f) <= F32_SIGN_BIT)
|
|
#define F32_EQUAL_1(f) (F32_AS_U32(f) == F32_VALUE_1)
|
|
#define F32_EQUAL_0(f) ( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
|
|
|
|
// only same sign
|
|
#define F32_A_GREATER_B(a,b) (F32_AS_S32((a)) > F32_AS_S32((b)))
|
|
|
|
#else
|
|
|
|
#define F32_LOWER_0(n) ((n) < 0.0f)
|
|
#define F32_LOWER_EQUAL_0(n) ((n) <= 0.0f)
|
|
#define F32_GREATER_0(n) ((n) > 0.0f)
|
|
#define F32_GREATER_EQUAL_0(n) ((n) >= 0.0f)
|
|
#define F32_EQUAL_1(n) ((n) == 1.0f)
|
|
#define F32_EQUAL_0(n) ((n) == 0.0f)
|
|
#define F32_A_GREATER_B(a,b) ((a) > (b))
|
|
#endif
|
|
|
|
|
|
#ifndef REALINLINE
|
|
#ifdef _MSC_VER
|
|
#define REALINLINE __forceinline
|
|
#else
|
|
#define REALINLINE inline
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
|
|
|
|
// 8-bit bools in Borland builder
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )
|
|
{
|
|
return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;
|
|
}
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )
|
|
{
|
|
return ( -condition >> 31 ) & a;
|
|
}
|
|
#else
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
|
|
{
|
|
return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;
|
|
}
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u16 if_c_a_else_b ( const s16 condition, const u16 a, const u16 b )
|
|
{
|
|
return ( ( -condition >> 15 ) & ( a ^ b ) ) ^ b;
|
|
}
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )
|
|
{
|
|
return ( -condition >> 31 ) & a;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
if (condition) state |= m; else state &= ~m;
|
|
*/
|
|
REALINLINE void setbit_cond ( u32 &state, s32 condition, u32 mask )
|
|
{
|
|
// 0, or any positive to mask
|
|
//s32 conmask = -condition >> 31;
|
|
state ^= ( ( -condition >> 31 ) ^ state ) & mask;
|
|
}
|
|
|
|
// NOTE: This is not as exact as the c99/c++11 round function, especially at high numbers starting with 8388609
|
|
// (only low number which seems to go wrong is 0.49999997 which is rounded to 1)
|
|
// Also negative 0.5 is rounded up not down unlike with the standard function (p.E. input -0.5 will be 0 and not -1)
|
|
inline f32 round_( f32 x )
|
|
{
|
|
return floorf( x + 0.5f );
|
|
}
|
|
|
|
// calculate: sqrt ( x )
|
|
REALINLINE f32 squareroot(const f32 f)
|
|
{
|
|
return sqrtf(f);
|
|
}
|
|
|
|
// calculate: sqrt ( x )
|
|
REALINLINE f64 squareroot(const f64 f)
|
|
{
|
|
return sqrt(f);
|
|
}
|
|
|
|
// calculate: sqrt ( x )
|
|
REALINLINE s32 squareroot(const s32 f)
|
|
{
|
|
return static_cast<s32>(squareroot(static_cast<f32>(f)));
|
|
}
|
|
|
|
#ifdef __IRR_HAS_S64
|
|
// calculate: sqrt ( x )
|
|
REALINLINE s64 squareroot(const s64 f)
|
|
{
|
|
return static_cast<s64>(squareroot(static_cast<f64>(f)));
|
|
}
|
|
#endif
|
|
|
|
// calculate: 1 / sqrt ( x )
|
|
REALINLINE f64 reciprocal_squareroot(const f64 x)
|
|
{
|
|
return 1.0 / sqrt(x);
|
|
}
|
|
|
|
// calculate: 1 / sqrtf ( x )
|
|
REALINLINE f32 reciprocal_squareroot(const f32 f)
|
|
{
|
|
#if defined ( IRRLICHT_FAST_MATH )
|
|
// NOTE: Unlike comment below says I found inaccuracies already at 4'th significant bit.
|
|
// p.E: Input 1, expected 1, got 0.999755859
|
|
|
|
#if defined(_MSC_VER) && !defined(_WIN64)
|
|
// SSE reciprocal square root estimate, accurate to 12 significant
|
|
// bits of the mantissa
|
|
f32 recsqrt;
|
|
__asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
|
|
__asm movss recsqrt, xmm0 // return xmm0
|
|
return recsqrt;
|
|
|
|
/*
|
|
// comes from Nvidia
|
|
u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
|
|
f32 y = *(f32*)&tmp;
|
|
return y * (1.47f - 0.47f * x * y * y);
|
|
*/
|
|
#else
|
|
return 1.f / sqrtf(f);
|
|
#endif
|
|
#else // no fast math
|
|
return 1.f / sqrtf(f);
|
|
#endif
|
|
}
|
|
|
|
// calculate: 1 / sqrtf( x )
|
|
REALINLINE s32 reciprocal_squareroot(const s32 x)
|
|
{
|
|
return static_cast<s32>(reciprocal_squareroot(static_cast<f32>(x)));
|
|
}
|
|
|
|
// calculate: 1 / x
|
|
REALINLINE f32 reciprocal( const f32 f )
|
|
{
|
|
#if defined (IRRLICHT_FAST_MATH)
|
|
// NOTE: Unlike with 1.f / f the values very close to 0 return -nan instead of inf
|
|
|
|
// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
|
|
// bi ts of the mantissa
|
|
// One Newton-Raphson Iteration:
|
|
// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
|
|
#if defined(_MSC_VER) && !defined(_WIN64)
|
|
f32 rec;
|
|
__asm rcpss xmm0, f // xmm0 = rcpss(f)
|
|
__asm movss xmm1, f // xmm1 = f
|
|
__asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
|
|
__asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
|
|
__asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
|
|
__asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
|
|
// - f * rcpss(f) * rcpss(f)
|
|
__asm movss rec, xmm0 // return xmm0
|
|
return rec;
|
|
#else // no support yet for other compilers
|
|
return 1.f / f;
|
|
#endif
|
|
//! i do not divide through 0.. (fpu expection)
|
|
// instead set f to a high value to get a return value near zero..
|
|
// -1000000000000.f.. is use minus to stay negative..
|
|
// must test's here (plane.normal dot anything ) checks on <= 0.f
|
|
//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
|
|
//return 1.f / FR ( x );
|
|
|
|
#else // no fast math
|
|
return 1.f / f;
|
|
#endif
|
|
}
|
|
|
|
// calculate: 1 / x
|
|
REALINLINE f64 reciprocal ( const f64 f )
|
|
{
|
|
return 1.0 / f;
|
|
}
|
|
|
|
|
|
// calculate: 1 / x, low precision allowed
|
|
REALINLINE f32 reciprocal_approxim ( const f32 f )
|
|
{
|
|
#if defined( IRRLICHT_FAST_MATH)
|
|
|
|
// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
|
|
// bi ts of the mantissa
|
|
// One Newton-Raphson Iteration:
|
|
// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
|
|
#if defined(_MSC_VER) && !defined(_WIN64)
|
|
f32 rec;
|
|
__asm rcpss xmm0, f // xmm0 = rcpss(f)
|
|
__asm movss xmm1, f // xmm1 = f
|
|
__asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
|
|
__asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
|
|
__asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
|
|
__asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
|
|
// - f * rcpss(f) * rcpss(f)
|
|
__asm movss rec, xmm0 // return xmm0
|
|
return rec;
|
|
#else // no support yet for other compilers
|
|
return 1.f / f;
|
|
#endif
|
|
|
|
/*
|
|
// SSE reciprocal estimate, accurate to 12 significant bits of
|
|
f32 rec;
|
|
__asm rcpss xmm0, f // xmm0 = rcpss(f)
|
|
__asm movss rec , xmm0 // return xmm0
|
|
return rec;
|
|
*/
|
|
/*
|
|
register u32 x = 0x7F000000 - IR ( p );
|
|
const f32 r = FR ( x );
|
|
return r * (2.0f - p * r);
|
|
*/
|
|
#else // no fast math
|
|
return 1.f / f;
|
|
#endif
|
|
}
|
|
|
|
|
|
REALINLINE s32 floor32(f32 x)
|
|
{
|
|
return (s32) floorf ( x );
|
|
}
|
|
|
|
REALINLINE s32 ceil32 ( f32 x )
|
|
{
|
|
return (s32) ceilf ( x );
|
|
}
|
|
|
|
// NOTE: Please check round_ documentation about some inaccuracies in this compared to standard library round function.
|
|
REALINLINE s32 round32(f32 x)
|
|
{
|
|
return (s32) round_(x);
|
|
}
|
|
|
|
inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
|
|
{
|
|
return a > b ? (a > c ? a : c) : (b > c ? b : c);
|
|
}
|
|
|
|
inline f32 f32_min3(const f32 a, const f32 b, const f32 c)
|
|
{
|
|
return a < b ? (a < c ? a : c) : (b < c ? b : c);
|
|
}
|
|
|
|
inline f32 fract ( f32 x )
|
|
{
|
|
return x - floorf ( x );
|
|
}
|
|
|
|
} // end namespace core
|
|
} // end namespace irr
|
|
|
|
#ifndef IRRLICHT_FAST_MATH
|
|
using irr::core::IR;
|
|
using irr::core::FR;
|
|
#endif
|
|
|
|
#endif
|