In this opportunity, I want to compare 3 techniques to draw the same geometry in different locations:
- Instancing Technique
- Geometry Shader Technique
- Vertex Shader Technique
Test Scenario
- Video Card: Nvidia GTX 680
- Graphics API: DirectX 11
- No Multisampling
- Back buffer resolution: 1920 x 1080
- Geometry: A box. We generate each box center position randomly. We use a uniform distribution function.
In the following picture, you can see 10kk boxes that are uniformly distributed.

Instancing Technique
Input Layout
The position is the position of each vertex and direction is a vector to translate current instance vertices.
D3D11_INPUT_ELEMENT_DESC inputElementDescriptions[] = {
{ POSITION, 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ DIRECTION, 0, DXGI_FORMAT_R32G32B32_FLOAT, 1, 0, D3D11_INPUT_PER_INSTANCE_DATA, 1 },
};
Buffers
- Vertex buffer: 8 vertices representing box vertices positions
- Index buffer: 36 indices representing indices to build 12 triangles
- Instancing buffer: NUM_BOXES direction vectors (float3) that represent direction to translate each box vertex.
Shaders
Vertex shader: Translates vertex position by instanced direction.
struct Input {
float3 PosOS : POSITION;
float3 DirOS : DIRECTION;
uint InstanceId: SV_InstanceID;
};
struct Output {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
cbuffer CBufferPerFrame : register (b0) {
float4x4 WorldViewProjection;
}
Output main(const Input input, const uint vertexId : SV_VertexId) {
Output output = (Output)0;
output.PosH = mul(float4(input.PosOS + input.DirOS, 1.0f), WorldViewProjection);
const float colorComp = (vertexId % 3) * 0.5f;
output.Color = float3(colorComp, 0.0f, colorComp);
return output;
}
Pixel shader:
struct PSInput {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
float4
main(in PSInput input) : SV_TARGET {
return float4(input.Color, 1.0f);
}
Geometry Shader Technique
Input Layout
Box center position.
D3D11_INPUT_ELEMENT_DESC inputElementDescriptions[] = {
{ POSITION, 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
Buffers
- Vertex buffer: NUM_CUBES vertices representing box center position.
Shaders
Vertex shader: Coordinate space transformations only
struct Input {
float3 PosOS : POSITION;
};
struct Output {
float4 PosWS : POSITION;
};
cbuffer CBufferPerFrame : register (b0) {
float4x4 World;
}
Output main(const Input input) {
Output output = (Output)0;
output.PosWS = mul(float4(input.PosOS, 1.0f), World);
return output;
}
Geometry shader: From each vertex (box center position), it generates a triangle stream (4 vertices * 6 faces = 24 vertices)
#define CUBE_VERTICES (24)
struct GSInput {
float4 PosWS : POSITION;
};
cbuffer cbPerFrame : register (b0) {
float4x4 ViewProjection;
float QuadHalfSize;
};
struct GSOutput {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
[maxvertexcount(CUBE_VERTICES)]
void
main(const in point GSInput input[1], inout TriangleStream<GSOutput> triangleStream) {
// Generate cube triangles
GSOutput output;
// Front face
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
// Back face
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
// Left face
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
// Right face
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
// Top face
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
// Bottom face
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(1.0f, 0.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, -QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 1.0f, 0.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(-QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 1.0f);
triangleStream.Append(output);
output.PosH = mul(input[0].PosWS + float4(QuadHalfSize, -QuadHalfSize, QuadHalfSize, 0.0f), ViewProjection);
output.Color = float3(0.0f, 0.0f, 0.0f);
triangleStream.Append(output);
triangleStream.RestartStrip();
}
Pixel shader: Returns color only
struct PSInput {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
float4
main(in PSInput input) : SV_TARGET {
return float4(input.Color, 1.0f);
}
Vertex Shader Technique
Input Layout
The position of each vertex of each box.
D3D11_INPUT_ELEMENT_DESC inputElementDescriptions[] = {
{ POSITION, 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
Buffers
- Vertex buffer: It contains 8 vertices per box(position).
- Index buffer: It contains 36 indices per box, to build 12 triangles per box
Shaders
Vertex shader: Coordinate space transformations only
struct Input {
float3 PosOS : POSITION;
};
struct Output {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
cbuffer CBufferPerFrame : register (b0) {
float4x4 WorldViewProjection;
}
Output main(const Input input, const uint vertexId : SV_VertexId) {
Output output = (Output)0;
output.PosH = mul(float4(input.PosOS, 1.0f), WorldViewProjection);
const float colorComp = (vertexId % 3) * 0.5f;
output.Color = float3(colorComp, 0.0f, colorComp);
return output;
}
Pixel shader: Returns color only
struct PSInput {
float4 PosH : SV_POSITION;
float3 Color : COLOR;
};
float4
main(in PSInput input) : SV_TARGET {
return float4(input.Color, 1.0f);
}
Benchmarks
I tested these 3 techniques with a different number of boxes in the previously described machine. It is important to mention that geometry is static and all buffers generation was done at the beginning of the execution and was not taken into account in FPS computation. These are the results:



Conclusion
There is a clear winner: Geometry Shader Technique. Instancing and Vertex shader techniques have very similar results.
If the number of boxes is N = 1kk, we are going to compute how many bytes each technique sends from CPU to GPU in each draw call:
- Instancing Technique:
- Vertex Buffer = 8 * 3 * sizeof(float) = 96 bytes +
- Index Buffer = 36 * sizeof(unsigned int) = 144 bytes +
- Instance Buffer = N * 3 * sizeof(float) = 12*N bytes
- Total = ~11.445149 MB
- Geometry Shader Technique:
- Vertex Buffer = N * 3 * sizeof(float) = 12 * N bytes
- Total = ~11.440917 MB
- Vertex Shader Technique:
- Vertex Buffer = N * 8 * 3 * sizeof(float) = 96 * N bytes +
- Index Buffer = N * 36 * sizeof(unsigned int) = 144 * N bytes
- Total = ~228.881835 MB
There is a clear loser: Vertex Shader Technique. Instancing and Geometry shader techniques send similar amount of data. In fact, Vertex Shader technique sends 20x more than the other two, and the most notorious point is it has very similar performance to Instancing Technique.
I was reading other blogs about this, and I found an interesting advice:
“If you are rendering a tiny number of verts per instance, you want to do it without instancing, because all three geometry pipelines take a hit” http://www.joshbarczak.com/blog/?p=667
In the next post, I am going to test these techniques but using a more complex geometry regarding generation and size(vertices/indices), like a sphere, for example.
Leave a comment