mirror of
https://github.com/minetest/irrlicht.git
synced 2024-11-03 00:40:41 +01:00
3280b3319e
To avoid changing burnings now those functions have no IRRLICHT_FAST_MATH anymore, there's a new header irrMathFastCompat.h which has ..._fast functions doing the old behavior. With the troubles they have documented. I changed burnings to use those functions throughout. Or as much as possible... Burnings probably also uses classes like SColor which also have functions using those, but I don't plan to adapt them. Maybe IRRLICHT_FAST_MATH should be a flag exlusive to burnings in the future, I don't think it makes much sense otherwise anymore (it often expects 32-bit asm). git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@6012 dfc29bdd-3216-0410-991c-e03cc46cb475
1096 lines
26 KiB
C++
1096 lines
26 KiB
C++
// Copyright (C) 2002-2012 Nikolaus Gebhardt / Thomas Alten
|
|
// This file is part of the "Irrlicht Engine".
|
|
// For conditions of distribution and use, see copyright notice in irrlicht.h
|
|
|
|
/*
|
|
History:
|
|
- changed behavior for log2 textures ( replaced multiplies by shift )
|
|
*/
|
|
|
|
#ifndef __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
|
|
#define __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
|
|
|
|
#include "SoftwareDriver2_compile_config.h"
|
|
#include "irrMath.h"
|
|
#include "irrMathFastCompat.h"
|
|
#include "CSoftwareTexture2.h"
|
|
#include "SMaterial.h"
|
|
|
|
|
|
namespace irr
|
|
{
|
|
|
|
// supporting different packed pixel needs many defines...
|
|
|
|
#ifdef SOFTWARE_DRIVER_2_32BIT
|
|
typedef u32 tVideoSample;
|
|
|
|
#define MASK_A 0xFF000000
|
|
#define MASK_R 0x00FF0000
|
|
#define MASK_G 0x0000FF00
|
|
#define MASK_B 0x000000FF
|
|
|
|
#define SHIFT_A 24
|
|
#define SHIFT_R 16
|
|
#define SHIFT_G 8
|
|
#define SHIFT_B 0
|
|
|
|
#define COLOR_MAX 0xFF
|
|
#define COLOR_MAX_LOG2 8
|
|
#define COLOR_BRIGHT_WHITE 0xFFFFFFFF
|
|
|
|
#define VIDEO_SAMPLE_GRANULARITY 2
|
|
|
|
#else
|
|
typedef u16 tVideoSample;
|
|
|
|
#define MASK_A 0x8000
|
|
#define MASK_R 0x7C00
|
|
#define MASK_G 0x03E0
|
|
#define MASK_B 0x001F
|
|
|
|
#define SHIFT_A 15
|
|
#define SHIFT_R 10
|
|
#define SHIFT_G 5
|
|
#define SHIFT_B 0
|
|
|
|
#define COLOR_MAX 0x1F
|
|
#define COLOR_MAX_LOG2 5
|
|
#define COLOR_BRIGHT_WHITE 0xFFFF
|
|
#define VIDEO_SAMPLE_GRANULARITY 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// ----------------------- Generic ----------------------------------
|
|
|
|
//! a more useful memset for pixel
|
|
// (standard memset only works with 8-bit values)
|
|
inline void memset32(void * dest, const u32 value, u32 bytesize)
|
|
{
|
|
u32 * d = (u32*) dest;
|
|
|
|
u32 i;
|
|
|
|
// loops unrolled to reduce the number of increments by factor ~8.
|
|
i = bytesize >> (2 + 3);
|
|
while (i)
|
|
{
|
|
d[0] = value;
|
|
d[1] = value;
|
|
d[2] = value;
|
|
d[3] = value;
|
|
|
|
d[4] = value;
|
|
d[5] = value;
|
|
d[6] = value;
|
|
d[7] = value;
|
|
|
|
d += 8;
|
|
i -= 1;
|
|
}
|
|
|
|
i = (bytesize >> 2 ) & 7;
|
|
while (i)
|
|
{
|
|
d[0] = value;
|
|
d += 1;
|
|
i -= 1;
|
|
}
|
|
}
|
|
|
|
//! a more useful memset for pixel
|
|
// (standard memset only works with 8-bit values)
|
|
inline void memset16(void * dest, const u16 value, u32 bytesize)
|
|
{
|
|
u16 * d = (u16*) dest;
|
|
|
|
u32 i;
|
|
|
|
// loops unrolled to reduce the number of increments by factor ~8.
|
|
i = bytesize >> (1 + 3);
|
|
while (i)
|
|
{
|
|
d[0] = value;
|
|
d[1] = value;
|
|
d[2] = value;
|
|
d[3] = value;
|
|
|
|
d[4] = value;
|
|
d[5] = value;
|
|
d[6] = value;
|
|
d[7] = value;
|
|
|
|
d += 8;
|
|
--i;
|
|
}
|
|
|
|
i = (bytesize >> 1 ) & 7;
|
|
while (i)
|
|
{
|
|
d[0] = value;
|
|
++d;
|
|
--i;
|
|
}
|
|
}
|
|
|
|
/*
|
|
use biased loop counter
|
|
--> 0 byte copy is forbidden
|
|
*/
|
|
REALINLINE void memcpy32_small ( void * dest, const void *source, u32 bytesize )
|
|
{
|
|
u32 c = bytesize >> 2;
|
|
|
|
do
|
|
{
|
|
((u32*) dest ) [ c-1 ] = ((u32*) source) [ c-1 ];
|
|
} while ( --c );
|
|
|
|
}
|
|
|
|
|
|
|
|
// integer log2 of a float ieee 754. TODO: non ieee floating point
|
|
static inline s32 s32_log2_f32( f32 f)
|
|
{
|
|
u32 x = IR ( f );
|
|
return ((x & 0x7F800000) >> 23) - 127;
|
|
}
|
|
|
|
static inline s32 s32_log2_s32(u32 x)
|
|
{
|
|
return s32_log2_f32( (f32) x);
|
|
}
|
|
|
|
static inline s32 s32_abs(s32 x)
|
|
{
|
|
s32 b = x >> 31;
|
|
return (x ^ b ) - b;
|
|
}
|
|
|
|
|
|
//! conditional set based on mask and arithmetic shift
|
|
REALINLINE u32 if_mask_a_else_b ( const u32 mask, const u32 a, const u32 b )
|
|
{
|
|
return ( mask & ( a ^ b ) ) ^ b;
|
|
}
|
|
|
|
// ------------------ Video---------------------------------------
|
|
/*!
|
|
Pixel = dest * ( 1 - alpha ) + source * alpha
|
|
alpha [0;256]
|
|
*/
|
|
REALINLINE u32 PixelBlend32 ( const u32 c2, const u32 c1, u32 alpha )
|
|
{
|
|
u32 srcRB = c1 & 0x00FF00FF;
|
|
u32 srcXG = c1 & 0x0000FF00;
|
|
|
|
u32 dstRB = c2 & 0x00FF00FF;
|
|
u32 dstXG = c2 & 0x0000FF00;
|
|
|
|
|
|
u32 rb = srcRB - dstRB;
|
|
u32 xg = srcXG - dstXG;
|
|
|
|
rb *= alpha;
|
|
xg *= alpha;
|
|
rb >>= 8;
|
|
xg >>= 8;
|
|
|
|
rb += dstRB;
|
|
xg += dstXG;
|
|
|
|
rb &= 0x00FF00FF;
|
|
xg &= 0x0000FF00;
|
|
|
|
return rb | xg;
|
|
}
|
|
|
|
/*!
|
|
Pixel = dest * ( 1 - alpha ) + source * alpha
|
|
alpha [0;32]
|
|
*/
|
|
inline u16 PixelBlend16 ( const u16 c2, const u32 c1, const u16 alpha )
|
|
{
|
|
const u16 srcRB = c1 & 0x7C1F;
|
|
const u16 srcXG = c1 & 0x03E0;
|
|
|
|
const u16 dstRB = c2 & 0x7C1F;
|
|
const u16 dstXG = c2 & 0x03E0;
|
|
|
|
u32 rb = srcRB - dstRB;
|
|
u32 xg = srcXG - dstXG;
|
|
|
|
rb *= alpha;
|
|
xg *= alpha;
|
|
rb >>= 5;
|
|
xg >>= 5;
|
|
|
|
rb += dstRB;
|
|
xg += dstXG;
|
|
|
|
rb &= 0x7C1F;
|
|
xg &= 0x03E0;
|
|
|
|
return (u16)(rb | xg);
|
|
}
|
|
|
|
/*
|
|
Pixel = c0 * (c1/31). c0 Alpha retain
|
|
*/
|
|
inline u16 PixelMul16 ( const u16 c0, const u16 c1)
|
|
{
|
|
return (u16)((( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 ) |
|
|
(( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 ) |
|
|
(( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5 ) |
|
|
(c0 & 0x8000));
|
|
}
|
|
|
|
/*
|
|
Pixel = c0 * (c1/31).
|
|
*/
|
|
inline u16 PixelMul16_2 ( u16 c0, u16 c1)
|
|
{
|
|
return (u16)(( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 |
|
|
( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 |
|
|
( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5 |
|
|
( c0 & c1 & 0x8000));
|
|
}
|
|
|
|
/*
|
|
Pixel = c0 * (c1/255). c0 Alpha Retain
|
|
*/
|
|
REALINLINE u32 PixelMul32 ( const u32 c0, const u32 c1)
|
|
{
|
|
return (c0 & 0xFF000000) |
|
|
(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
|
|
(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
|
|
(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8 ) & 0x000000FF);
|
|
}
|
|
|
|
/*
|
|
Pixel = c0 * (c1/255).
|
|
*/
|
|
REALINLINE u32 PixelMul32_2 ( const u32 c0, const u32 c1)
|
|
{
|
|
return (( ( (c0 & 0xFF000000) >> 16 ) * ( (c1 & 0xFF000000) >> 16 ) ) & 0xFF000000 ) |
|
|
(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
|
|
(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
|
|
(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8 ) & 0x000000FF);
|
|
}
|
|
|
|
/*
|
|
Pixel = clamp ( c0 + c1, 0, 255 )
|
|
*/
|
|
REALINLINE u32 PixelAdd32 ( const u32 c2, const u32 c1)
|
|
{
|
|
u32 sum = ( c2 & 0x00FFFFFF ) + ( c1 & 0x00FFFFFF );
|
|
u32 low_bits = ( c2 ^ c1 ) & 0x00010101;
|
|
s32 carries = ( sum - low_bits ) & 0x01010100;
|
|
u32 modulo = sum - carries;
|
|
u32 clamp = carries - ( carries >> 8 );
|
|
return modulo | clamp;
|
|
}
|
|
|
|
#if 0
|
|
|
|
// 1 - Bit Alpha Blending
|
|
inline u16 PixelBlend16 ( const u16 destination, const u16 source )
|
|
{
|
|
if((source & 0x8000) == 0x8000)
|
|
return source; // The source is visible, so use it.
|
|
else
|
|
return destination; // The source is transparent, so use the destination.
|
|
}
|
|
|
|
// 1 - Bit Alpha Blending 16Bit SIMD
|
|
inline u32 PixelBlend16_simd ( const u32 destination, const u32 source )
|
|
{
|
|
switch(source & 0x80008000)
|
|
{
|
|
case 0x80008000: // Both source pixels are visible
|
|
return source;
|
|
|
|
case 0x80000000: // Only the first source pixel is visible
|
|
return (source & 0xFFFF0000) | (destination & 0x0000FFFF);
|
|
|
|
case 0x00008000: // Only the second source pixel is visible.
|
|
return (destination & 0xFFFF0000) | (source & 0x0000FFFF);
|
|
|
|
default: // Neither source pixel is visible.
|
|
return destination;
|
|
}
|
|
}
|
|
#else
|
|
|
|
// 1 - Bit Alpha Blending
|
|
inline u16 PixelBlend16 ( const u16 c2, const u16 c1 )
|
|
{
|
|
u16 mask = ((c1 & 0x8000) >> 15 ) + 0x7fff;
|
|
return (c2 & mask ) | ( c1 & ~mask );
|
|
}
|
|
|
|
// 1 - Bit Alpha Blending 16Bit SIMD
|
|
inline u32 PixelBlend16_simd ( const u32 c2, const u32 c1 )
|
|
{
|
|
u32 mask = ((c1 & 0x80008000) >> 15 ) + 0x7fff7fff;
|
|
return (c2 & mask ) | ( c1 & ~mask );
|
|
}
|
|
|
|
#endif
|
|
|
|
/*!
|
|
Pixel = dest * ( 1 - SourceAlpha ) + source * SourceAlpha (OpenGL blending)
|
|
*/
|
|
inline u32 PixelBlend32 ( const u32 c2, const u32 c1 )
|
|
{
|
|
// alpha test
|
|
u32 alpha = c1 & 0xFF000000;
|
|
|
|
if ( 0 == alpha )
|
|
return c2;
|
|
if ( 0xFF000000 == alpha )
|
|
{
|
|
return c1;
|
|
}
|
|
|
|
alpha >>= 24;
|
|
|
|
// add highbit alpha, if ( alpha > 127 ) alpha += 1;
|
|
alpha += ( alpha >> 7);
|
|
|
|
u32 srcRB = c1 & 0x00FF00FF;
|
|
u32 srcXG = c1 & 0x0000FF00;
|
|
|
|
u32 dstRB = c2 & 0x00FF00FF;
|
|
u32 dstXG = c2 & 0x0000FF00;
|
|
|
|
|
|
u32 rb = srcRB - dstRB;
|
|
u32 xg = srcXG - dstXG;
|
|
|
|
rb *= alpha;
|
|
xg *= alpha;
|
|
rb >>= 8;
|
|
xg >>= 8;
|
|
|
|
rb += dstRB;
|
|
xg += dstXG;
|
|
|
|
rb &= 0x00FF00FF;
|
|
xg &= 0x0000FF00;
|
|
|
|
return (c1 & 0xFF000000) | rb | xg;
|
|
}
|
|
|
|
/*!
|
|
Pixel =>
|
|
color = sourceAlpha > 0 ? source, else dest
|
|
alpha = max(destAlpha, sourceAlpha)
|
|
*/
|
|
inline u16 PixelCombine16 ( const u16 c2, const u16 c1 )
|
|
{
|
|
if ( video::getAlpha(c1) > 0 )
|
|
return c1;
|
|
else
|
|
return c2;
|
|
}
|
|
|
|
/*!
|
|
Pixel =>
|
|
color = dest * ( 1 - SourceAlpha ) + source * SourceAlpha,
|
|
alpha = destAlpha * ( 1 - SourceAlpha ) + sourceAlpha
|
|
|
|
where "1" means "full scale" (255)
|
|
*/
|
|
inline u32 PixelCombine32 ( const u32 c2, const u32 c1 )
|
|
{
|
|
// alpha test
|
|
u32 alpha = c1 & 0xFF000000;
|
|
|
|
if ( 0 == alpha )
|
|
return c2;
|
|
if ( 0xFF000000 == alpha )
|
|
{
|
|
return c1;
|
|
}
|
|
|
|
alpha >>= 24;
|
|
|
|
// add highbit alpha, if ( alpha > 127 ) alpha += 1;
|
|
// stretches [0;255] to [0;256] to avoid division by 255. use division 256 == shr 8
|
|
alpha += ( alpha >> 7);
|
|
|
|
u32 srcRB = c1 & 0x00FF00FF;
|
|
u32 srcXG = c1 & 0x0000FF00;
|
|
|
|
u32 dstRB = c2 & 0x00FF00FF;
|
|
u32 dstXG = c2 & 0x0000FF00;
|
|
|
|
|
|
u32 rb = srcRB - dstRB;
|
|
u32 xg = srcXG - dstXG;
|
|
|
|
rb *= alpha;
|
|
xg *= alpha;
|
|
rb >>= 8;
|
|
xg >>= 8;
|
|
|
|
rb += dstRB;
|
|
xg += dstXG;
|
|
|
|
rb &= 0x00FF00FF;
|
|
xg &= 0x0000FF00;
|
|
|
|
u32 sa = c1 >> 24;
|
|
u32 da = c2 >> 24;
|
|
u32 blendAlpha_fix8 = (sa*256 + da*(256-alpha))>>8;
|
|
return blendAlpha_fix8 << 24 | rb | xg;
|
|
}
|
|
|
|
|
|
|
|
// ------------------ Fix Point ----------------------------------
|
|
|
|
typedef s32 tFixPoint;
|
|
typedef u32 tFixPointu;
|
|
|
|
// Fix Point 12
|
|
#if 0
|
|
#define FIX_POINT_PRE 12
|
|
#define FIX_POINT_FRACT_MASK 0xFFF
|
|
#define FIX_POINT_SIGNED_MASK 0xFFFFF000
|
|
#define FIX_POINT_UNSIGNED_MASK 0x7FFFF000
|
|
#define FIX_POINT_ONE 0x1000
|
|
#define FIX_POINT_ZERO_DOT_FIVE 0x0800
|
|
#define FIX_POINT_F32_MUL 4096.f
|
|
#endif
|
|
|
|
// Fix Point 10
|
|
#if 1
|
|
#define FIX_POINT_PRE 10
|
|
#define FIX_POINT_FRACT_MASK 0x3FF
|
|
#define FIX_POINT_SIGNED_MASK 0xFFFFFC00
|
|
#define FIX_POINT_UNSIGNED_MASK 0x7FFFFE00
|
|
#define FIX_POINT_ONE 0x400
|
|
#define FIX_POINT_ZERO_DOT_FIVE 0x200
|
|
#define FIX_POINT_F32_MUL 1024.f
|
|
#endif
|
|
|
|
// Fix Point 9
|
|
#if 0
|
|
#define FIX_POINT_PRE 9
|
|
#define FIX_POINT_FRACT_MASK 0x1FF
|
|
#define FIX_POINT_SIGNED_MASK 0xFFFFFE00
|
|
#define FIX_POINT_UNSIGNED_MASK 0x7FFFFE00
|
|
#define FIX_POINT_ONE 0x200
|
|
#define FIX_POINT_ZERO_DOT_FIVE 0x100
|
|
#define FIX_POINT_F32_MUL 512.f
|
|
#endif
|
|
|
|
// Fix Point 7
|
|
#if 0
|
|
#define FIX_POINT_PRE 7
|
|
#define FIX_POINT_FRACT_MASK 0x7F
|
|
#define FIX_POINT_SIGNED_MASK 0xFFFFFF80
|
|
#define FIX_POINT_UNSIGNED_MASK 0x7FFFFF80
|
|
#define FIX_POINT_ONE 0x80
|
|
#define FIX_POINT_ZERO_DOT_FIVE 0x40
|
|
#define FIX_POINT_F32_MUL 128.f
|
|
#endif
|
|
|
|
#define FIXPOINT_COLOR_MAX ( COLOR_MAX << FIX_POINT_PRE )
|
|
#define FIX_POINT_HALF_COLOR ( (tFixPoint) ( ((f32) COLOR_MAX / 2.f * FIX_POINT_F32_MUL ) ) )
|
|
|
|
|
|
/*
|
|
convert signed integer to fixpoint
|
|
*/
|
|
inline tFixPoint s32_to_fixPoint (const s32 x)
|
|
{
|
|
return x << FIX_POINT_PRE;
|
|
}
|
|
|
|
inline tFixPointu u32_to_fixPoint (const u32 x)
|
|
{
|
|
return x << FIX_POINT_PRE;
|
|
}
|
|
|
|
inline u32 fixPointu_to_u32 (const tFixPointu x)
|
|
{
|
|
return x >> FIX_POINT_PRE;
|
|
}
|
|
|
|
|
|
// 1/x * FIX_POINT
|
|
#define fix_inverse32(x) (FIX_POINT_F32_MUL / (x))
|
|
|
|
|
|
/*
|
|
convert float to fixpoint
|
|
fast convert (fistp on x86) HAS to be used..
|
|
hints: compileflag /QIfist for msvc7. msvc 8.0 has smth different
|
|
others should use their favourite assembler..
|
|
*/
|
|
static inline int f_round2(f32 f)
|
|
{
|
|
f += (3<<22);
|
|
return IR(f) - 0x4b400000;
|
|
}
|
|
|
|
/*
|
|
convert f32 to Fix Point.
|
|
multiply is needed anyway, so scale mulby
|
|
*/
|
|
REALINLINE tFixPoint tofix0 (const f32 x, const f32 mulby = FIX_POINT_F32_MUL )
|
|
{
|
|
return (tFixPoint) (x * mulby);
|
|
}
|
|
#define tofix(x,y) (tFixPoint)(x * y)
|
|
|
|
/*
|
|
Fix Point , Fix Point Multiply
|
|
*/
|
|
REALINLINE tFixPointu imulFixu(const tFixPointu x, const tFixPointu y)
|
|
{
|
|
return (x * y) >> (tFixPointu) FIX_POINT_PRE;
|
|
}
|
|
|
|
/*
|
|
Fix Point , Fix Point Multiply
|
|
*/
|
|
REALINLINE tFixPoint imulFix(const tFixPoint x, const tFixPoint y)
|
|
{
|
|
return ( x * y) >> ( FIX_POINT_PRE );
|
|
}
|
|
|
|
/*
|
|
Fix Point , Fix Point Multiply x * y * 2
|
|
*/
|
|
REALINLINE tFixPoint imulFix2(const tFixPoint x, const tFixPoint y)
|
|
{
|
|
return ( x * y) >> ( FIX_POINT_PRE -1 );
|
|
}
|
|
|
|
|
|
/*
|
|
Multiply x * y * 1
|
|
*/
|
|
REALINLINE tFixPoint imulFix_tex1(const tFixPoint x, const tFixPoint y)
|
|
{
|
|
return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 4 );
|
|
}
|
|
|
|
/*
|
|
Multiply x * y * 2
|
|
*/
|
|
REALINLINE tFixPoint imulFix_tex2(const tFixPoint x, const tFixPoint y)
|
|
{
|
|
return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 3 );
|
|
}
|
|
|
|
/*
|
|
Multiply x * y * 4
|
|
*/
|
|
REALINLINE tFixPoint imulFix_tex4(const tFixPoint x, const tFixPoint y)
|
|
{
|
|
#ifdef SOFTWARE_DRIVER_2_32BIT
|
|
return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 2 );
|
|
#else
|
|
return ( x * y) >> ( FIX_POINT_PRE + ( VIDEO_SAMPLE_GRANULARITY * 3 ) );
|
|
#endif
|
|
}
|
|
|
|
/*!
|
|
clamp FixPoint to maxcolor in FixPoint, min(a,31)
|
|
*/
|
|
REALINLINE tFixPoint clampfix_maxcolor ( const tFixPoint a)
|
|
{
|
|
tFixPoint c = (a - FIXPOINT_COLOR_MAX) >> 31;
|
|
return (a & c) | ( FIXPOINT_COLOR_MAX & ~c);
|
|
}
|
|
|
|
/*!
|
|
clamp FixPoint to 0 in FixPoint, max(a,0)
|
|
*/
|
|
REALINLINE tFixPoint clampfix_mincolor ( const tFixPoint a)
|
|
{
|
|
return a - ( a & ( a >> 31 ) );
|
|
}
|
|
|
|
REALINLINE tFixPoint saturateFix ( const tFixPoint a)
|
|
{
|
|
return clampfix_mincolor ( clampfix_maxcolor ( a ) );
|
|
}
|
|
|
|
|
|
// rount fixpoint to int
|
|
inline s32 roundFix ( const tFixPoint x )
|
|
{
|
|
return ( x + FIX_POINT_ZERO_DOT_FIVE ) >> FIX_POINT_PRE;
|
|
}
|
|
|
|
|
|
|
|
// x in [0;1[
|
|
inline s32 f32_to_23Bits(const f32 x)
|
|
{
|
|
f32 y = x + 1.f;
|
|
return IR(y) & 0x7FFFFF; // last 23 bits
|
|
}
|
|
|
|
/*!
|
|
return VideoSample from fixpoint
|
|
*/
|
|
REALINLINE tVideoSample fix_to_color ( const tFixPoint r, const tFixPoint g, const tFixPoint b )
|
|
{
|
|
return ( FIXPOINT_COLOR_MAX & FIXPOINT_COLOR_MAX) << ( SHIFT_A - FIX_POINT_PRE ) |
|
|
( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
|
|
( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
|
|
( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
|
|
/*!
|
|
return VideoSample from fixpoint
|
|
*/
|
|
REALINLINE tVideoSample fix4_to_color ( const tFixPoint a, const tFixPoint r, const tFixPoint g, const tFixPoint b )
|
|
{
|
|
return ( a & (FIX_POINT_FRACT_MASK - 1 )) << ( SHIFT_A - 1 ) |
|
|
( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
|
|
( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
|
|
( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
/*!
|
|
return fixpoint from VideoSample granularity COLOR_MAX
|
|
*/
|
|
inline void color_to_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
|
|
{
|
|
(tFixPointu&) r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
|
|
(tFixPointu&) g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
(tFixPointu&) b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
/*!
|
|
return fixpoint from VideoSample granularity COLOR_MAX
|
|
*/
|
|
inline void color_to_fix ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
|
|
{
|
|
(tFixPointu&) a = (t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE );
|
|
(tFixPointu&) r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
|
|
(tFixPointu&) g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
(tFixPointu&) b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
/*!
|
|
return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
|
|
*/
|
|
inline void color_to_fix1 ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
|
|
{
|
|
(tFixPointu&) r = (t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
|
|
(tFixPointu&) g = (t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
|
|
(tFixPointu&) b = (t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
|
|
}
|
|
|
|
/*!
|
|
return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
|
|
*/
|
|
inline void color_to_fix1 ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
|
|
{
|
|
(tFixPointu&) a = (t00 & MASK_A) >> ( SHIFT_A + COLOR_MAX_LOG2 - FIX_POINT_PRE );
|
|
(tFixPointu&) r = (t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
|
|
(tFixPointu&) g = (t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
|
|
(tFixPointu&) b = (t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
|
|
}
|
|
|
|
|
|
|
|
// ----- FP24 ---- floating point z-buffer
|
|
|
|
#if 1
|
|
typedef f32 fp24;
|
|
#else
|
|
struct fp24
|
|
{
|
|
u32 v;
|
|
|
|
fp24() {}
|
|
|
|
fp24 ( const f32 f )
|
|
{
|
|
f32 y = f + 1.f;
|
|
v = ((u32&)y) & 0x7FFFFF; // last 23 bits
|
|
}
|
|
|
|
void operator=(const f32 f )
|
|
{
|
|
f32 y = f + 1.f;
|
|
v = ((u32&)y) & 0x7FFFFF; // last 23 bits
|
|
}
|
|
|
|
void operator+=(const fp24 &other )
|
|
{
|
|
v += other.v;
|
|
}
|
|
|
|
operator f32 () const
|
|
{
|
|
f32 r = FR ( v );
|
|
return r + 1.f;
|
|
}
|
|
|
|
};
|
|
#endif
|
|
|
|
|
|
// ------------------------ Internal Texture -----------------------------
|
|
|
|
struct sInternalTexture
|
|
{
|
|
u32 textureXMask;
|
|
u32 textureYMask;
|
|
|
|
u32 pitchlog2;
|
|
void *data;
|
|
|
|
video::CSoftwareTexture2 *Texture;
|
|
s32 lodLevel;
|
|
};
|
|
|
|
|
|
|
|
// get video sample plain
|
|
inline tVideoSample getTexel_plain ( const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty )
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
return *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
}
|
|
|
|
// get video sample to fix
|
|
inline void getTexel_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
tVideoSample t00;
|
|
t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
|
|
g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
|
|
}
|
|
|
|
// get video sample to fixpoint
|
|
REALINLINE void getTexel_fix ( tFixPoint &a,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
tVideoSample t00;
|
|
t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
a = (t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
|
|
}
|
|
|
|
|
|
inline void getSample_texture_dither ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty,
|
|
const u32 x, const u32 y
|
|
)
|
|
{
|
|
static const tFixPointu dithermask[] =
|
|
{
|
|
0x00,0x80,0x20,0xa0,
|
|
0xc0,0x40,0xe0,0x60,
|
|
0x30,0xb0,0x10,0x90,
|
|
0xf0,0x70,0xd0,0x50
|
|
};
|
|
|
|
const u32 index = (y & 3 ) << 2 | (x & 3);
|
|
|
|
const tFixPointu _ntx = (tx + dithermask [ index ] ) & t->textureXMask;
|
|
const tFixPointu _nty = (ty + dithermask [ index ] ) & t->textureYMask;
|
|
|
|
u32 ofs;
|
|
ofs = ( ( _nty ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( _ntx ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
(tFixPointu &) r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
|
|
(tFixPointu &) g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
(tFixPointu &) b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
|
|
}
|
|
|
|
/*
|
|
load a sample from internal texture at position tx,ty to fixpoint
|
|
*/
|
|
#ifndef SOFTWARE_DRIVER_2_BILINEAR
|
|
|
|
// get Sample linear == getSample_fixpoint
|
|
|
|
inline void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
(tFixPointu &) r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
|
|
(tFixPointu &) g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
(tFixPointu &) b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
inline void getSample_texture ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
(tFixPointu &)a = (t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
|
|
(tFixPointu &)r = (t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
|
|
(tFixPointu &)g = (t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
|
|
(tFixPointu &)b = (t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
|
|
// get sample linear
|
|
REALINLINE void getSample_linear ( tFixPointu &r, tFixPointu &g, tFixPointu &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
tVideoSample t00;
|
|
t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
r = (t00 & MASK_R) >> SHIFT_R;
|
|
g = (t00 & MASK_G) >> SHIFT_G;
|
|
b = (t00 & MASK_B);
|
|
}
|
|
|
|
// get Sample bilinear
|
|
REALINLINE void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
|
|
tFixPointu r00,g00,b00;
|
|
tFixPointu r01,g01,b01;
|
|
tFixPointu r10,g10,b10;
|
|
tFixPointu r11,g11,b11;
|
|
|
|
#if 0
|
|
getSample_linear ( r00, g00, b00, t, tx,ty );
|
|
getSample_linear ( r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
|
|
getSample_linear ( r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
|
|
getSample_linear ( r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
|
|
#else
|
|
u32 o0, o1,o2,o3;
|
|
tVideoSample t00;
|
|
|
|
o0 = ( ( (ty) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
o1 = ( ( (ty+FIX_POINT_ONE) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
o2 = ( (tx) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
o3 = ( (tx+FIX_POINT_ONE) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o2 ) ));
|
|
r00 = (t00 & MASK_R) >> SHIFT_R;
|
|
g00 = (t00 & MASK_G) >> SHIFT_G;
|
|
b00 = (t00 & MASK_B);
|
|
|
|
t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o3 ) ));
|
|
r10 = (t00 & MASK_R) >> SHIFT_R;
|
|
g10 = (t00 & MASK_G) >> SHIFT_G;
|
|
b10 = (t00 & MASK_B);
|
|
|
|
t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o2 ) ));
|
|
r01 = (t00 & MASK_R) >> SHIFT_R;
|
|
g01 = (t00 & MASK_G) >> SHIFT_G;
|
|
b01 = (t00 & MASK_B);
|
|
|
|
t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o3 ) ));
|
|
r11 = (t00 & MASK_R) >> SHIFT_R;
|
|
g11 = (t00 & MASK_G) >> SHIFT_G;
|
|
b11 = (t00 & MASK_B);
|
|
|
|
#endif
|
|
|
|
const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
|
|
const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
|
|
|
|
const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
|
|
const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
|
|
|
|
const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
|
|
const tFixPointu w10 = imulFixu ( txFract , tyFractInv );
|
|
const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
|
|
const tFixPointu w11 = imulFixu ( txFract , tyFract );
|
|
|
|
r = (r00 * w00 ) +
|
|
(r01 * w01 ) +
|
|
(r10 * w10 ) +
|
|
(r11 * w11 );
|
|
|
|
g = (g00 * w00 ) +
|
|
(g01 * w01 ) +
|
|
(g10 * w10 ) +
|
|
(g11 * w11 );
|
|
|
|
b = (b00 * w00 ) +
|
|
(b01 * w01 ) +
|
|
(b10 * w10 ) +
|
|
(b11 * w11 );
|
|
|
|
}
|
|
|
|
|
|
// get sample linear
|
|
REALINLINE void getSample_linear ( tFixPointu &a, tFixPointu &r, tFixPointu &g, tFixPointu &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
u32 ofs;
|
|
|
|
ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
|
|
ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
|
|
|
|
// texel
|
|
tVideoSample t00;
|
|
t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
|
|
|
|
a = (t00 & MASK_A) >> SHIFT_A;
|
|
r = (t00 & MASK_R) >> SHIFT_R;
|
|
g = (t00 & MASK_G) >> SHIFT_G;
|
|
b = (t00 & MASK_B);
|
|
}
|
|
|
|
// get Sample bilinear
|
|
REALINLINE void getSample_texture ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b,
|
|
const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
|
|
)
|
|
{
|
|
|
|
tFixPointu a00, r00,g00,b00;
|
|
tFixPointu a01, r01,g01,b01;
|
|
tFixPointu a10, r10,g10,b10;
|
|
tFixPointu a11, r11,g11,b11;
|
|
|
|
getSample_linear ( a00, r00, g00, b00, t, tx,ty );
|
|
getSample_linear ( a10, r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
|
|
getSample_linear ( a01, r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
|
|
getSample_linear ( a11, r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
|
|
|
|
const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
|
|
const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
|
|
|
|
const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
|
|
const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
|
|
|
|
const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
|
|
const tFixPointu w10 = imulFixu ( txFract , tyFractInv );
|
|
const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
|
|
const tFixPointu w11 = imulFixu ( txFract , tyFract );
|
|
|
|
a = (a00 * w00 ) +
|
|
(a01 * w01 ) +
|
|
(a10 * w10 ) +
|
|
(a11 * w11 );
|
|
|
|
r = (r00 * w00 ) +
|
|
(r01 * w01 ) +
|
|
(r10 * w10 ) +
|
|
(r11 * w11 );
|
|
|
|
g = (g00 * w00 ) +
|
|
(g01 * w01 ) +
|
|
(g10 * w10 ) +
|
|
(g11 * w11 );
|
|
|
|
b = (b00 * w00 ) +
|
|
(b01 * w01 ) +
|
|
(b10 * w10 ) +
|
|
(b11 * w11 );
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
// some 2D Defines
|
|
struct AbsRectangle
|
|
{
|
|
s32 x0;
|
|
s32 y0;
|
|
s32 x1;
|
|
s32 y1;
|
|
};
|
|
|
|
//! 2D Intersection test
|
|
inline bool intersect ( AbsRectangle &dest, const AbsRectangle& a, const AbsRectangle& b)
|
|
{
|
|
dest.x0 = core::s32_max( a.x0, b.x0 );
|
|
dest.y0 = core::s32_max( a.y0, b.y0 );
|
|
dest.x1 = core::s32_min( a.x1, b.x1 );
|
|
dest.y1 = core::s32_min( a.y1, b.y1 );
|
|
return dest.x0 < dest.x1 && dest.y0 < dest.y1;
|
|
}
|
|
|
|
// some 1D defines
|
|
struct sIntervall
|
|
{
|
|
s32 start;
|
|
s32 end;
|
|
};
|
|
|
|
// returning intersection width
|
|
inline s32 intervall_intersect_test( const sIntervall& a, const sIntervall& b)
|
|
{
|
|
return core::s32_min( a.end, b.end ) - core::s32_max( a.start, b.start );
|
|
}
|
|
|
|
|
|
} // end namespace irr
|
|
|
|
#endif
|
|
|