Ok... to implement do this.
(It's up to you devs whether you'll add a "Instanced" suffix onto each instanced function, and by the way I dliberately didnt do instances=1 as default because I have a feeling that drawing just one instance with glDrawElementsInstancedEXT is slower than drawing one instance with glDrawElements. And ofcourse your beloved compatibility with the TNT graphics cards )
We need a new enum value in EDriverFeatures.h
Code: Select all
//! Supports hardware instancing
EVDF_DRAW_INSTANCED,
Code: Select all
void drawIndexedTriangleList(const S3DVertex* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_STANDARD, scene::EPT_TRIANGLES, EIT_16BIT,instances);
}
void drawIndexedTriangleList(const S3DVertex2TCoords* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_2TCOORDS, scene::EPT_TRIANGLES, EIT_16BIT,instances);
}
void drawIndexedTriangleList(const S3DVertexTangents* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_TANGENTS, scene::EPT_TRIANGLES, EIT_16BIT,instances);
}
void drawIndexedTriangleFan(const S3DVertex* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_STANDARD, scene::EPT_TRIANGLE_FAN, EIT_16BIT,instances);
}
void drawIndexedTriangleFan(const S3DVertex2TCoords* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_2TCOORDS, scene::EPT_TRIANGLE_FAN, EIT_16BIT,instances);
}
void drawIndexedTriangleFan(const S3DVertexTangents* vertices,
u32 vertexCount, const u16* indexList, u32 triangleCount, u32 instances)
{
drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, triangleCount, EVT_TANGENTS, scene::EPT_TRIANGLE_FAN, EIT_16BIT,instances);
}
virtual void drawMeshBuffer(const scene::IMeshBuffer* mb, u32 instances) =0;
virtual void drawVertexPrimitiveListInstanced(const void* vertices, u32 vertexCount,
const void* indexList, u32 primCount, u32 instances,
E_VERTEX_TYPE vType=EVT_STANDARD,
scene::E_PRIMITIVE_TYPE pType=scene::EPT_TRIANGLES,
E_INDEX_TYPE iType=EIT_16BIT) =0;
Code: Select all
virtual void drawHardwareBuffer(SHWBufferLink *HWBuffer, u32 instances) {}
virtual void drawVertexPrimitiveListInstanced(const void* vertices, u32 vertexCount,
const void* indexList, u32 primitiveCount, u32 instances,
E_VERTEX_TYPE vType=EVT_STANDARD, scene::E_PRIMITIVE_TYPE pType=scene::EPT_TRIANGLES, E_INDEX_TYPE iType=EIT_16BIT);
virtual void drawMeshBuffer(const scene::IMeshBuffer* mb,u32 instances);
Code: Select all
void CNullDriver::drawVertexPrimitiveListInstanced(const void* vertices, u32 vertexCount, const void* indexList, u32 primitiveCount, u32 instances, E_VERTEX_TYPE vType, scene::E_PRIMITIVE_TYPE pType, E_INDEX_TYPE iType)
{
if (getDriverType()!=video::EDT_OPENGL)
{
os::Printer::log("Drawing Hardware Instanced Vertex Primitive Lists is not supported in this driver.");
return;
}
if ((iType==EIT_16BIT) && (vertexCount>65536))
os::Printer::log("Too many vertices for 16bit index type, render artifacts may occur.");
PrimitivesDrawn += primitiveCount*instances;
}
void CNullDriver::drawMeshBuffer(const scene::IMeshBuffer* mb, u32 instances)
{
if (!mb)
return;
//IVertexBuffer and IIndexBuffer later
SHWBufferLink *HWBuffer=getBufferLink(mb);
if (HWBuffer)
drawHardwareBuffer(HWBuffer,instances);
else
drawVertexPrimitiveListInstanced(mb->getVertices(), mb->getVertexCount(), mb->getIndices(), mb->getIndexCount()/3,instances, mb->getVertexType(), scene::EPT_TRIANGLES, mb->getIndexType());
}
Code: Select all
//add this under general functions
void extGlDrawArraysInstanced(GLenum mode, GLint first, GLsizei count, GLsizei primcount);
void extGlDrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount);
//then add these after "PFNGLPROGRAMPARAMETERIEXTPROC pGlProgramParameteriEXT"
PFNGLDRAWARRAYSINSTANCEDPROC pGlDrawArraysInstanced;
PFNGLDRAWARRAYSINSTANCEDARBPROC pGlDrawArraysInstancedARB;
PFNGLDRAWARRAYSINSTANCEDEXTPROC pGlDrawArraysInstancedEXT;
PFNGLDRAWELEMENTSINSTANCEDPROC pGlDrawElementsInstanced;
PFNGLDRAWELEMENTSINSTANCEDARBPROC pGlDrawElementsInstancedARB;
PFNGLDRAWELEMENTSINSTANCEDEXTPROC pGlDrawElementsInstancedEXT;
//then this at the end of the file
inline void COpenGLExtensionHandler::extGlDrawArraysInstanced(GLenum mode, GLint first, GLsizei count, GLsizei primcount)
{
#if defined(_IRR_OPENGL_USE_EXTPOINTER_)
if (queryFeature(EVDF_DRAW_INSTANCED))
{
if (pGlDrawArraysInstanced)
pGlDrawArraysInstanced(mode, first, count, primcount);
else if (pGlDrawArraysInstancedARB)
pGlDrawArraysInstancedARB(mode, first, count, primcount);
else if (pGlDrawArraysInstancedEXT)
pGlDrawArraysInstancedEXT(mode, first, count, primcount);
}
#elif defined(GL_ARB_draw_instanced)
glDrawArraysInstancedARB(mode, first, count, primcount);
#elif defined(GL_EXT_draw_instanced)
glDrawArraysInstancedEXT(mode, first, count, primcount);
#else
os::Printer::log("DrawArraysInstanced not supported", ELL_ERROR);
#endif
}
inline void COpenGLExtensionHandler::extGlDrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount)
{
#if defined(_IRR_OPENGL_USE_EXTPOINTER_)
if (queryFeature(EVDF_DRAW_INSTANCED))
{
if (pGlDrawElementsInstanced)
pGlDrawElementsInstanced(mode, count, type, indices, primcount);
else if (pGlDrawElementsInstancedARB)
pGlDrawElementsInstancedARB(mode, count, type, indices, primcount);
else if (pGlDrawElementsInstancedEXT)
pGlDrawElementsInstancedEXT(mode, count, type, indices, primcount);
}
#elif defined(GL_ARB_draw_instanced)
glDrawElementsInstancedARB(mode, count, type, indices, primcount);
#elif defined(GL_EXT_draw_instanced)
glDrawElementsInstancedEXT(mode, count, type, indices, primcount);
#else
os::Printer::log("DrawElementsInstanced not supported", ELL_ERROR);
#endif
}
Code: Select all
//add this after "pGlProgramParameteriARB(0), pGlProgramParameteriEXT(0),"
pGlDrawArraysInstanced(0), pGlDrawArraysInstancedARB(0), pGlDrawArraysInstancedEXT(0),
pGlDrawElementsInstanced(0), pGlDrawElementsInstancedARB(0),pGlDrawElementsInstancedEXT(0)
//these after "pGlProgramParameteriARB= (PFNGLPROGRAMPARAMETERIARBPROC) wglGetProcAddress("glProgramParameteriARB");"
pGlDrawArraysInstanced= (PFNGLDRAWARRAYSINSTANCEDPROC) wglGetProcAddress("glDrawArraysInstanced");
pGlDrawArraysInstancedARB= (PFNGLDRAWARRAYSINSTANCEDARBPROC) wglGetProcAddress("glDrawArraysInstancedARB");
pGlDrawArraysInstancedEXT= (PFNGLDRAWARRAYSINSTANCEDEXTPROC) wglGetProcAddress("glDrawArraysInstancedEXT");
pGlDrawElementsInstanced= (PFNGLDRAWELEMENTSINSTANCEDPROC) wglGetProcAddress("glDrawElementsInstanced");
pGlDrawElementsInstancedARB= (PFNGLDRAWELEMENTSINSTANCEDARBPROC) wglGetProcAddress("glDrawElementsInstancedARB");
pGlDrawElementsInstancedEXT= (PFNGLDRAWELEMENTSINSTANCEDEXTPROC) wglGetProcAddress("glDrawElementsInstancedEXT");
//these after "IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glProgramParameteriEXT"));"
pGlDrawArraysInstanced= (PFNGLDRAWARRAYSINSTANCEDPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawArraysInstanced"));
pGlDrawArraysInstancedARB= (PFNGLDRAWARRAYSINSTANCEDARBPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawArraysInstancedARB"));
pGlDrawArraysInstancedEXT= (PFNGLDRAWARRAYSINSTANCEDEXTPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawArraysInstancedEXT"));
pGlDrawElementsInstanced= (PFNGLDRAWELEMENTSINSTANCEDPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawElementsInstanced"));
pGlDrawElementsInstancedARB= (PFNGLDRAWELEMENTSINSTANCEDARBPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawElementsInstancedARB"));
pGlDrawElementsInstancedEXT= (PFNGLDRAWELEMENTSINSTANCEDEXTPROC)
IRR_OGL_LOAD_EXTENSION(reinterpret_cast<const GLubyte*>("glDrawElementsInstancedEXT"));
//and this at the very end in the queryFeature fucntion
case EVDF_MRT_BLEND_FUNC:
return FeatureAvailable[IRR_ARB_draw_buffers_blend] || FeatureAvailable[IRR_AMD_draw_buffers_blend];
case EVDF_DRAW_INSTANCED:
return FeatureAvailable[IRR_ARB_draw_instanced] && (FeatureAvailable[IRR_EXT_gpu_shader4] || FeatureAvailable[IRR_NV_vertex_program4]);
Code: Select all
//public
virtual void drawHardwareBuffer(SHWBufferLink *HWBuffer,u32 instances);
virtual void drawVertexPrimitiveListInstanced(const void* vertices, u32 vertexCount,
const void* indexList, u32 primitiveCount, u32 instances,
E_VERTEX_TYPE vType, scene::E_PRIMITIVE_TYPE pType, E_INDEX_TYPE iType);
//private
void renderArray(const void* indexList, u32 primitiveCount,
scene::E_PRIMITIVE_TYPE pType, E_INDEX_TYPE iType, u32 instances);
Code: Select all
void COpenGLDriver::drawVertexPrimitiveListInstanced(const void* vertices, u32 vertexCount,
const void* indexList, u32 primitiveCount, u32 instances,
E_VERTEX_TYPE vType, scene::E_PRIMITIVE_TYPE pType, E_INDEX_TYPE iType)
{
if (!primitiveCount || !vertexCount)
return;
if (!checkPrimitiveCount(primitiveCount))
return;
CNullDriver::drawVertexPrimitiveListInstanced(vertices, vertexCount, indexList, primitiveCount, instances, vType, pType, iType);
if (vertices)
createColorBuffer(vertices, vertexCount, vType);
// draw everything
setRenderStates3DMode();
if (MultiTextureExtension)
extGlClientActiveTexture(GL_TEXTURE0_ARB);
glEnableClientState(GL_COLOR_ARRAY);
glEnableClientState(GL_VERTEX_ARRAY);
if ((pType!=scene::EPT_POINTS) && (pType!=scene::EPT_POINT_SPRITES))
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if ((pType!=scene::EPT_POINTS) && (pType!=scene::EPT_POINT_SPRITES))
glEnableClientState(GL_NORMAL_ARRAY);
if (vertices)
glColorPointer(4, GL_UNSIGNED_BYTE, 0, &ColorBuffer[0]);
switch (vType)
{
case EVT_STANDARD:
if (vertices)
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertex), &(static_cast<const S3DVertex*>(vertices))[0].Normal);
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex), &(static_cast<const S3DVertex*>(vertices))[0].TCoords);
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertex), &(static_cast<const S3DVertex*>(vertices))[0].Pos);
}
else
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertex), buffer_offset(12));
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(S3DVertex), buffer_offset(24));
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex), buffer_offset(28));
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertex), 0);
}
if (MultiTextureExtension && CurrentTexture[1])
{
extGlClientActiveTexture(GL_TEXTURE1_ARB);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if (vertices)
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex), &(static_cast<const S3DVertex*>(vertices))[0].TCoords);
else
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex), buffer_offset(28));
}
break;
case EVT_2TCOORDS:
if (vertices)
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertex2TCoords), &(static_cast<const S3DVertex2TCoords*>(vertices))[0].Normal);
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex2TCoords), &(static_cast<const S3DVertex2TCoords*>(vertices))[0].TCoords);
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertex2TCoords), &(static_cast<const S3DVertex2TCoords*>(vertices))[0].Pos);
}
else
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertex2TCoords), buffer_offset(12));
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(S3DVertex2TCoords), buffer_offset(24));
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex2TCoords), buffer_offset(28));
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertex2TCoords), buffer_offset(0));
}
if (MultiTextureExtension)
{
extGlClientActiveTexture(GL_TEXTURE1_ARB);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if (vertices)
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex2TCoords), &(static_cast<const S3DVertex2TCoords*>(vertices))[0].TCoords2);
else
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertex2TCoords), buffer_offset(36));
}
break;
case EVT_TANGENTS:
if (vertices)
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertexTangents), &(static_cast<const S3DVertexTangents*>(vertices))[0].Normal);
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertexTangents), &(static_cast<const S3DVertexTangents*>(vertices))[0].TCoords);
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), &(static_cast<const S3DVertexTangents*>(vertices))[0].Pos);
}
else
{
glNormalPointer(GL_FLOAT, sizeof(S3DVertexTangents), buffer_offset(12));
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(S3DVertexTangents), buffer_offset(24));
glTexCoordPointer(2, GL_FLOAT, sizeof(S3DVertexTangents), buffer_offset(28));
glVertexPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), buffer_offset(0));
}
if (MultiTextureExtension)
{
extGlClientActiveTexture(GL_TEXTURE1_ARB);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if (vertices)
glTexCoordPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), &(static_cast<const S3DVertexTangents*>(vertices))[0].Tangent);
else
glTexCoordPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), buffer_offset(36));
extGlClientActiveTexture(GL_TEXTURE2_ARB);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if (vertices)
glTexCoordPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), &(static_cast<const S3DVertexTangents*>(vertices))[0].Binormal);
else
glTexCoordPointer(3, GL_FLOAT, sizeof(S3DVertexTangents), buffer_offset(48));
}
break;
}
renderArray(indexList, primitiveCount, pType, iType, instances);
if (MultiTextureExtension)
{
if (vType==EVT_TANGENTS)
{
extGlClientActiveTexture(GL_TEXTURE2_ARB);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
if ((vType!=EVT_STANDARD) || CurrentTexture[1])
{
extGlClientActiveTexture(GL_TEXTURE1_ARB);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
extGlClientActiveTexture(GL_TEXTURE0_ARB);
}
glDisableClientState(GL_COLOR_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
void COpenGLDriver::renderArray(const void* indexList, u32 primitiveCount,
scene::E_PRIMITIVE_TYPE pType, E_INDEX_TYPE iType, u32 instances)
{
GLenum indexSize=0;
switch (iType)
{
case EIT_16BIT:
{
indexSize=GL_UNSIGNED_SHORT;
break;
}
case EIT_32BIT:
{
indexSize=GL_UNSIGNED_INT;
break;
}
}
switch (pType)
{
case scene::EPT_POINTS:
case scene::EPT_POINT_SPRITES:
{
#ifdef GL_ARB_point_sprite
if (pType==scene::EPT_POINT_SPRITES && FeatureAvailable[IRR_ARB_point_sprite])
glEnable(GL_POINT_SPRITE_ARB);
#endif
// prepare size and attenuation (where supported)
GLfloat particleSize=Material.Thickness;
// if (AntiAlias)
// particleSize=core::clamp(particleSize, DimSmoothedPoint[0], DimSmoothedPoint[1]);
// else
particleSize=core::clamp(particleSize, DimAliasedPoint[0], DimAliasedPoint[1]);
#if defined(GL_VERSION_1_4) || defined(GL_ARB_point_parameters) || defined(GL_EXT_point_parameters) || defined(GL_SGIS_point_parameters)
const float att[] = {1.0f, 1.0f, 0.0f};
#if defined(GL_VERSION_1_4)
extGlPointParameterfv(GL_POINT_DISTANCE_ATTENUATION, att);
// extGlPointParameterf(GL_POINT_SIZE_MIN,1.f);
extGlPointParameterf(GL_POINT_SIZE_MAX, particleSize);
extGlPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE, 1.0f);
#elif defined(GL_ARB_point_parameters)
extGlPointParameterfv(GL_POINT_DISTANCE_ATTENUATION_ARB, att);
// extGlPointParameterf(GL_POINT_SIZE_MIN_ARB,1.f);
extGlPointParameterf(GL_POINT_SIZE_MAX_ARB, particleSize);
extGlPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE_ARB, 1.0f);
#elif defined(GL_EXT_point_parameters)
extGlPointParameterfv(GL_DISTANCE_ATTENUATION_EXT, att);
// extGlPointParameterf(GL_POINT_SIZE_MIN_EXT,1.f);
extGlPointParameterf(GL_POINT_SIZE_MAX_EXT, particleSize);
extGlPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE_EXT, 1.0f);
#elif defined(GL_SGIS_point_parameters)
extGlPointParameterfv(GL_DISTANCE_ATTENUATION_SGIS, att);
// extGlPointParameterf(GL_POINT_SIZE_MIN_SGIS,1.f);
extGlPointParameterf(GL_POINT_SIZE_MAX_SGIS, particleSize);
extGlPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE_SGIS, 1.0f);
#endif
#endif
glPointSize(particleSize);
#ifdef GL_ARB_point_sprite
if (pType==scene::EPT_POINT_SPRITES && FeatureAvailable[IRR_ARB_point_sprite])
glTexEnvf(GL_POINT_SPRITE_ARB,GL_COORD_REPLACE, GL_TRUE);
#endif
extGlDrawArraysInstanced(GL_POINTS, 0, primitiveCount,instances);
#ifdef GL_ARB_point_sprite
if (pType==scene::EPT_POINT_SPRITES && FeatureAvailable[IRR_ARB_point_sprite])
{
glDisable(GL_POINT_SPRITE_ARB);
glTexEnvf(GL_POINT_SPRITE_ARB,GL_COORD_REPLACE, GL_FALSE);
}
#endif
}
break;
case scene::EPT_LINE_STRIP:
extGlDrawElementsInstanced(GL_LINE_STRIP, primitiveCount+1, indexSize, indexList,instances);
break;
case scene::EPT_LINE_LOOP:
extGlDrawElementsInstanced(GL_LINE_LOOP, primitiveCount, indexSize, indexList,instances);
break;
case scene::EPT_LINES:
extGlDrawElementsInstanced(GL_LINES, primitiveCount*2, indexSize, indexList,instances);
break;
case scene::EPT_TRIANGLE_STRIP:
extGlDrawElementsInstanced(GL_TRIANGLE_STRIP, primitiveCount+2, indexSize, indexList,instances);
break;
case scene::EPT_TRIANGLE_FAN:
extGlDrawElementsInstanced(GL_TRIANGLE_FAN, primitiveCount+2, indexSize, indexList,instances);
break;
case scene::EPT_TRIANGLES:
extGlDrawElementsInstanced(GL_TRIANGLES, primitiveCount*3, indexSize, indexList,instances);
break;
case scene::EPT_QUAD_STRIP:
extGlDrawElementsInstanced(GL_QUAD_STRIP, primitiveCount*2+2, indexSize, indexList,instances);
break;
case scene::EPT_QUADS:
extGlDrawElementsInstanced(GL_QUADS, primitiveCount*4, indexSize, indexList,instances);
break;
case scene::EPT_POLYGON:
extGlDrawElementsInstanced(GL_POLYGON, primitiveCount, indexSize, indexList,instances);
break;
}
}
void COpenGLDriver::drawHardwareBuffer(SHWBufferLink *_HWBuffer, u32 instances)
{
if (!_HWBuffer)
return;
updateHardwareBuffer(_HWBuffer); //check if update is needed
_HWBuffer->LastUsed=0; //reset count
#if defined(GL_ARB_vertex_buffer_object)
SHWBufferLink_opengl *HWBuffer=(SHWBufferLink_opengl*)_HWBuffer;
const scene::IMeshBuffer* mb = HWBuffer->MeshBuffer;
const void *vertices=mb->getVertices();
const void *indexList=mb->getIndices();
if (HWBuffer->Mapped_Vertex!=scene::EHM_NEVER)
{
extGlBindBuffer(GL_ARRAY_BUFFER, HWBuffer->vbo_verticesID);
vertices=0;
}
if (HWBuffer->Mapped_Index!=scene::EHM_NEVER)
{
extGlBindBuffer(GL_ELEMENT_ARRAY_BUFFER, HWBuffer->vbo_indicesID);
indexList=0;
}
drawVertexPrimitiveListInstanced(vertices, mb->getVertexCount(), indexList, mb->getIndexCount()/3, instances, mb->getVertexType(), scene::EPT_TRIANGLES, mb->getIndexType());
if (HWBuffer->Mapped_Vertex!=scene::EHM_NEVER)
extGlBindBuffer(GL_ARRAY_BUFFER, 0);
if (HWBuffer->Mapped_Index!=scene::EHM_NEVER)
extGlBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
#endif
}
AND OFCOURSE THE EXAMPLE (uncomment stuff to swtich between HW and pseudo instancing)
main.cpp
Code: Select all
#include <irrlicht.h>
using namespace irr;
using namespace core;
using namespace scene;
using namespace video;
using namespace io;
using namespace gui;
IrrlichtDevice* device = 0;
scene::ISceneManager* smgr = 0;
video::IVideoDriver* driver = 0;
gui::IGUIEnvironment* env = 0;
scene::ISceneCollisionManager* coll = 0;
scene::IMeshManipulator* mm = 0;
video::IGPUProgrammingServices* gpu = 0;
bool initIrrlicht(s32 w, s32 h, bool opengl = true, s32 bpp=32, bool fullscreen=false, bool stentil = false);
const int NUM_BATCH_INSTANCES = 248;
core::matrix4 instanceWorldArray[NUM_BATCH_INSTANCES];
core::array<ISceneNode*> totalNodeArray;
core::matrix4 viewProjection;
struct InstancingShaderCB : public video::IShaderConstantSetCallBack
{
void OnSetConstants(video::IMaterialRendererServices* services,
s32 userData)
{
services->setVertexShaderConstant("instanceWorldArray", (f32*)instanceWorldArray, 16*NUM_BATCH_INSTANCES);
services->setVertexShaderConstant("viewProjection", viewProjection.pointer(), 16);
}
};
int main()
{
if (!initIrrlicht(800,800))
return -1;
device->setWindowCaption(L"instancing");
//!mesh generation
IMesh* aMesh = smgr->getGeometryCreator()->createSphereMesh(4,4,4);
IMeshBuffer* bBuffer = aMesh->getMeshBuffer(0);
bBuffer->setHardwareMappingHint(scene::EHM_STATIC);/*
IMesh* bMesh = mm->createMeshWith2TCoords(aMesh);
IMeshBuffer* bBuffer = bMesh->getMeshBuffer(0);
//!create dupBuffer with bBuffer repeated NUM_BATCH_INSTANCES times
SMeshBufferLightMap dupBuffer;
for (int k=0;k<NUM_BATCH_INSTANCES;k++)
{
S3DVertex2TCoords* verts = (S3DVertex2TCoords*)bBuffer->getVertices();
for (u32 i=0; i<bBuffer->getVertexCount(); i++)
{
verts[i].TCoords2.X = k;//assign the index of instance that each vertex belongs to
}
dupBuffer.append(verts,bBuffer->getVertexCount(),bBuffer->getIndices(),bBuffer->getIndexCount());
dupBuffer.setHardwareMappingHint(scene::EHM_STATIC);
}*/
//!save transformation in one EmptySceneNode which doesn't render itself
f32 scale = 18.f;
for (u32 i=0;i<18;i++)
for (u32 j=0;j<18;j++)
for (u32 k=0;k<18;k++)
{
ISceneNode* empty = smgr->addEmptySceneNode();
empty->setPosition(vector3df(i*scale,j*scale,k*scale));
empty->setScale(vector3df(1+rand()%2));
empty->setRotation(vector3df(rand()%360,rand()%360,rand()%360));
totalNodeArray.push_back(empty);
}
scene::ICameraSceneNode* cam = smgr->addCameraSceneNodeFPS();
SMaterial mtrl;
//shader
InstancingShaderCB* callback = new InstancingShaderCB();
s32 mtrlShader = gpu->addHighLevelShaderMaterialFromFiles("../../media/instancing.vert", "", video::EVST_VS_2_0,
"../../media/instancing.frag", "", video::EPST_PS_2_0,callback, video::EMT_SOLID);
callback->drop();
mtrl.Lighting = false;
mtrl.setTexture(0, driver->getTexture("../../media/fire.bmp"));
mtrl.MaterialType = (video::E_MATERIAL_TYPE)mtrlShader;
device->getCursorControl()->setVisible(false);
s32 lastFPS = -1;
while(device->run())
{
if (device->isWindowActive())
{
driver->beginScene(true, true, SColor(255,122,122,122));
smgr->drawAll();
viewProjection = driver->getTransform(video::ETS_PROJECTION);
viewProjection *= driver->getTransform(video::ETS_VIEW);
{
driver->setMaterial(mtrl);
int nRemainingBoxes = totalNodeArray.size();
int node_idx = 0;
while( nRemainingBoxes > 0)
{
int nRenderBoxes = core::min_( nRemainingBoxes, NUM_BATCH_INSTANCES );
nRemainingBoxes -= nRenderBoxes;
for (int i=0;i<nRenderBoxes; i++)
instanceWorldArray[i] = totalNodeArray[node_idx++]->getAbsoluteTransformation();
driver->drawMeshBuffer(bBuffer,nRenderBoxes);
//driver->drawMeshBuffer(&dupBuffer);
}
}
env->drawAll();
driver->endScene();
int fps = driver->getFPS();
if (lastFPS != fps)
{
core::stringw str = L"fps: ";
str += fps;
str += ", poly: ";
str += driver->getPrimitiveCountDrawn();
device->setWindowCaption(str.c_str());
lastFPS = fps;
}
}
}
device->drop();
return 0;
}
bool initIrrlicht(s32 w, s32 h, bool opengl, s32 bpp, bool fullscreen, bool stentil)
{
SIrrlichtCreationParameters param;
param.WindowSize = core::dimension2d<u32>(w, h);
param.AntiAlias = true;
param.Fullscreen = fullscreen;
param.Bits = bpp;
param.Stencilbuffer = stentil;
param.DriverType = opengl ? EDT_OPENGL : EDT_DIRECT3D9;
device = createDeviceEx(param);
if (device)
{
driver = device->getVideoDriver();
env = device->getGUIEnvironment();
//driver->setTextureCreationFlag(ETCF_ALWAYS_32_BIT,true);
smgr = device->getSceneManager();
coll = smgr->getSceneCollisionManager();
mm = smgr->getMeshManipulator();
gpu = driver->getGPUProgrammingServices();
//setCursorVisible(false);
return true;
}
else
return false;
}
Code: Select all
#extension GL_ARB_draw_instanced : enable
uniform mat4 viewProjection;
#define NUM_BATCH_INSTANCES 248
uniform mat4 instanceWorldArray[NUM_BATCH_INSTANCES];
void main()
{
//int index = int(gl_MultiTexCoord1.x);
//mat4 WVP = viewProjection*instanceWorldArray[index];
mat4 WVP = viewProjection*instanceWorldArray[gl_InstanceID];
gl_Position = WVP*gl_Vertex;
gl_TexCoord[0] = gl_MultiTexCoord0;
}
Code: Select all
uniform sampler2D tex0;
void main( void )
{
gl_FragColor = texture2D(tex0,gl_TexCoord[0].xy);
}
The funny thing is that the instancing performs the same drawing 200k polygons as it does drawing 10 million (30 fps) so the number of instances matters.