A) Implement a TRUE Flexible Vertex Format (done)
B) Rewrite Mesh Generators and Loaders to create our new flexible meshes (being done)
C) Pick new generic irrlicht mesh (pos+vcol+2d tc+normal) data format (which is now 3bytes thinner) (done)
So our choice was to pick INTEGER_10_10_10_2_REV for vertex normal data, inspired by Crysis 3 improved GBuffer normal quantization, I thought that 3x 32bit float for normals is excessive on static meshes.
So here are some new nice functions which will convert a 3D float direction into 3x 10bit ints or 3x 8bit ints.
The PNSR on our test sphere was > 100db, meaning we took the dot product between the TRUE 32bit float normal and the quantized version and were able to intensify (1-dot(true,quant)) by a factor of 2000.f before actually seeing a difference on the screen!
But that was only looking at per-vertex error!
When we compared interpolated per-pixel normals, the differences became even less apparent!
The quantization would most probably improve much more if we used double precision floating point as source data.
Code: Select all
// This file is part of the "Irrlicht Engine".
// For conditions of distribution and use, see copyright notice in irrlicht.h
#ifndef __S_VERTEX_MANIPULATOR_H_INCLUDED__
#define __S_VERTEX_MANIPULATOR_H_INCLUDED__
#include "vectorSIMD.h"
#include "IMeshBuffer.h"
namespace irr
{
namespace scene
{
//! Interface for vertex manipulators.
/** You should derive your manipulator from this class if it shall be called for every vertex, getting as parameter just the vertex.
*/
struct IVertexManipulator
{
};
//! Vertex manipulator which scales the position of the vertex
class SVertexPositionScaleManipulator : public IVertexManipulator
{
public:
SVertexPositionScaleManipulator(const core::vector3df& factor) : Factor(factor) {}
template <typename VType>
void operator()(void* data, const scene::E_COMPONENTS_PER_ATTRIBUTE &components, const scene::E_COMPONENT_TYPE& type) const
{
///vertex.Pos *= Factor;
}
private:
core::vector3df Factor;
};
//! Vertex manipulator which transforms the position of the vertex
class SVertexPositionTransformManipulator : public IVertexManipulator
{
public:
SVertexPositionTransformManipulator(const core::matrix4& m) : Transformation(m) {}
template <typename VType>
void operator()(void* data, const scene::E_COMPONENTS_PER_ATTRIBUTE &components, const scene::E_COMPONENT_TYPE& type) const
{
///Transformation.transformVect(vertex.Pos);
}
private:
core::matrix4 Transformation;
};
inline core::vectorSIMDf findBestFit(const uint32_t& bits, const core::vectorSIMDf& normal)
{
core::vectorSIMDf fittingVector = normal;
fittingVector.makeSafe3D();
fittingVector = fittingVector.getAbsoluteValue();
core::vectorSIMDf vectorForDots(fittingVector);
vectorForDots /= vectorForDots.getLength(); //precise normalize
float maxNormalComp;
core::vectorSIMDf corners[4];
core::vectorSIMDf floorOffset;
if (fittingVector.X>fittingVector.Y)
{
maxNormalComp = fittingVector.X;
corners[1].set(0,1,0);
corners[2].set(0,0,1);
corners[3].set(0,1,1);
//floorOffset.set(0.499f,0.f,0.f);
}
else
{
maxNormalComp = fittingVector.Y;
corners[1].set(1,0,0);
corners[2].set(0,0,1);
corners[3].set(1,0,1);
//floorOffset.set(0.f,0.499f,0.f);
}
//second round
if (fittingVector.Z>maxNormalComp)
{
maxNormalComp = fittingVector.Z;
corners[1].set(1,0,0);
corners[2].set(0,1,0);
corners[3].set(1,1,0);
//floorOffset.set(0.f,0.f,0.499f);
}
floorOffset.set(0.499f,0.499f,0.499f); //i dont know why, but this works better than other offsets
if (maxNormalComp<=0.577f) //max component of 3d normal cannot be less than sqrt(1/3)
return core::vectorSIMDf(0.f);
fittingVector /= maxNormalComp;
uint32_t cubeHalfSize = (0x1u<<(bits-1))-1;
float closestTo1 = -1.f;
core::vectorSIMDf bestFit = fittingVector;
for (uint32_t n=1; n<=cubeHalfSize; n++)
{
//we'd use float addition in the interest of speed, to increment the loop
//but adding a small number to a large one loses precision, so multiplication preferrable
core::vectorSIMDf bottomFit = fittingVector*float(n);
bottomFit += floorOffset;
bottomFit = floor(bottomFit);
for (uint32_t i=0; i<4; i++)
{
core::vectorSIMDf bottomFitTmp = bottomFit;
if (i)
{
bottomFitTmp += corners[i];
if ((bottomFitTmp>core::vectorSIMDf(cubeHalfSize)).any())
continue;
}
float bottomFitLen = bottomFitTmp.getLengthAsFloat();//more precise normalization
float dp = bottomFitTmp.dotProductAsFloat(vectorForDots);
if (dp>closestTo1*bottomFitLen)
{
closestTo1 = dp/bottomFitLen;
bestFit = bottomFitTmp;
}
}
}
return bestFit+0.499f;//core::min_(bestFit,core::vectorSIMDf(cubeHalfSize))+0.01f;
}
inline uint32_t quantizeNormal2_10_10_10(const core::vectorSIMDf &normal)
{
uint32_t bestFit;
core::vectorSIMDf fit = findBestFit(10,normal);
const uint32_t xorflag = (0x1u<<10)-1;
bestFit = ((uint32_t(fit.X)^(normal.X<0.f ? xorflag:0))+(normal.X<0.f ? 1:0))&xorflag;
bestFit |= (((uint32_t(fit.Y)^(normal.Y<0.f ? xorflag:0))+(normal.Y<0.f ? 1:0))&xorflag)<<10;
bestFit |= (((uint32_t(fit.Z)^(normal.Z<0.f ? xorflag:0))+(normal.Z<0.f ? 1:0))&xorflag)<<20;
return bestFit;
}
inline uint32_t quantizeNormal888(const core::vectorSIMDf &normal)
{
uint8_t bestFit[4];
core::vectorSIMDf fit = findBestFit(8,normal);
const uint32_t xorflag = (0x1u<<8)-1;
bestFit[0] = (uint32_t(fit.X)^(normal.X<0.f ? xorflag:0))+(normal.X<0.f ? 1:0);
bestFit[1] = (uint32_t(fit.Y)^(normal.Y<0.f ? xorflag:0))+(normal.Y<0.f ? 1:0);
bestFit[2] = (uint32_t(fit.Z)^(normal.Z<0.f ? xorflag:0))+(normal.Z<0.f ? 1:0);
return *reinterpret_cast<uint32_t*>(bestFit);
}/*
ECT_FLOAT=0,
ECT_HALF_FLOAT,
ECT_DOUBLE_IN_FLOAT_OUT,
ECT_UNSIGNED_INT_10F_11F_11F_REV,
//INTEGER FORMS
ECT_NORMALIZED_INT_2_10_10_10_REV,
ECT_NORMALIZED_UNSIGNED_INT_2_10_10_10_REV,
ECT_NORMALIZED_BYTE,
ECT_NORMALIZED_UNSIGNED_BYTE,
ECT_NORMALIZED_SHORT,
ECT_NORMALIZED_UNSIGNED_SHORT,
ECT_NORMALIZED_INT,
ECT_NORMALIZED_UNSIGNED_INT,
ECT_INT_2_10_10_10_REV,
ECT_UNSIGNED_INT_2_10_10_10_REV,
ECT_BYTE,
ECT_UNSIGNED_BYTE,
ECT_SHORT,
ECT_UNSIGNED_SHORT,
ECT_INT,
ECT_UNSIGNED_INT,
ECT_INTEGER_INT_2_10_10_10_REV,
ECT_INTEGER_UNSIGNED_INT_2_10_10_10_REV,
ECT_INTEGER_BYTE,
ECT_INTEGER_UNSIGNED_BYTE,
ECT_INTEGER_SHORT,
ECT_INTEGER_UNSIGNED_SHORT,
ECT_INTEGER_INT,
ECT_INTEGER_UNSIGNED_INT,*/
} // end namespace scene
} // end namespace irr
#endif
Incase someone wonders why we didnt make a version for GL_HALF_FLOAT, thats because 16bit float gives you less precision than 16bit integer (think about exponent on a normalized floating point vector, one bit is never used!).
Plus even in the actually used 15bits of a Half Float, some combinations are invalid (such as when none of the components are bigger than 0.577)
In reality a 32bit INT would have more precision than a 32bit float when quantized properly, so if we ever get Double Precision SIMD vectors I may write a version which quantizes to higher precisons.