For study (and of course fun!) reasons I have implemented a ``cheap’’ boid system using compute shaders.
Here’s the code
import org.openrndr.Fullscreen
import org.openrndr.KEY_ENTER
import org.openrndr.application
import org.openrndr.color.ColorRGBa
import org.openrndr.draw.*
import org.openrndr.extra.gui.GUI
import org.openrndr.extra.parameters.DoubleParameter
import org.openrndr.math.Polar
import org.openrndr.math.Vector2
import org.openrndr.math.Vector3
import org.openrndr.math.transforms.transform
import kotlin.math.PI
fun main() = application {
configure {
fullscreen = Fullscreen.CURRENT_DISPLAY_MODE
}
program {
val computeWidth = 50
val computeHeight = 10
val particleCount = computeWidth * computeHeight * 32
val gridSize = 200
var swapIndex = false
var randomize = false
val computeShader = ComputeShader.fromCode(
"""
#version 430
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
uniform int gridSize;
uniform int computeWidth;
uniform float width;
uniform float height;
uniform float cohesion;
uniform float alignment;
uniform float separation;
uniform float target;
uniform vec2 mousePos;
struct ParticleTransform {
mat4 transform;
};
struct ParticleProperties {
vec2 velocity;
vec2 dir;
};
struct GridProperties {
vec3 pos;
vec2 dir;
};
layout(binding=0) buffer transformsBuffer {
ParticleTransform transforms[];
};
layout(binding=1) buffer propertiesBuffer {
ParticleProperties properties[];
};
layout(binding=2) buffer gridPrev {
GridProperties propsPre[];
};
layout(binding=3) buffer gridNext {
GridProperties propsNext[];
};
void main() {
// Get particle id from global invocation
const uint id = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * computeWidth * gl_WorkGroupSize.x;
ParticleTransform pt = transforms[id];
ParticleProperties pp = properties[id];
vec2 position = vec2(pt.transform[3][0], pt.transform[3][1]);
vec2 velocity = vec2(pp.velocity.x, pp.velocity.y);
vec2 dir = vec2(pp.dir.x, pp.dir.y);
// Get cell id according to position
float stepSize = width/gridSize;
int xGrid = int(position.x/stepSize);
int yGrid = int(position.y/stepSize);
uint idGrid = xGrid + yGrid * gridSize;
// Initialize accumulators
vec2 totPos = vec2(0.0);
vec2 totDir = vec2(0.0);
float count = 0;
float countMax = -1;
int idDensest = -1;
// Get positions and densest cell. No boundary conditions at the moment!
if (xGrid != 0 && yGrid != 0 && xGrid != (gridSize - 1) && yGrid != (gridSize - 1)){
for (int x = -1; x<=1; x++){
for (int y = -1; y<=1; y++){
int cellId = int(mod(idGrid + x + y * gridSize, gridSize * gridSize));
vec3 posTot = propsPre[cellId].pos;
vec2 dirTot = propsPre[cellId].dir;
totPos += posTot.xy;
count += posTot.z;
totDir += dirTot;
if (posTot.z > countMax) {
countMax = posTot.z;
idDensest = cellId;
}
}
}
}
// Get the point with the greatest number of particles in neighbouring cells;
vec2 posDensest = propsPre[idDensest].pos.xy;
if (count > 1.0){
// Get cohesion force
vec2 cohesionForce = (totPos/count - position);
vec2 align = (normalize(totDir/count) - dir);
velocity += cohesionForce * cohesion * 0.01 ;
// Get alignment force
dir += (align - dir) * 0.01;
dir = normalize(dir);
velocity += (dir - velocity) * alignment;
// Get separation force
vec2 separationForce = -(posDensest/countMax - position);
velocity += separationForce * separation * 0.1 ;
}
// Periodic boundary conditions on position
if (position.x < 0 )
{
position.x = width;
}
if (position.x > width )
{
position.x = 0;
}
if (position.y < 0 )
{
position.y = height;
}
if (position.y > height )
{
position.y = 0;
}
// Steer towards mouse position
vec2 desired = normalize(mousePos - position) * 0.1;
vec2 steer = (desired - velocity);
velocity += steer * target;
// Normalize velocity and move particle
velocity = normalize(velocity) * 0.9;
position += velocity;
// Store particle position and direction in the grid
propsNext[idGrid].pos += vec3(position, 1.0);
propsNext[idGrid].dir += dir;
// Update position, velocity and direction
transforms[id].transform[3][0] = position.x;
transforms[id].transform[3][1] = position.y;
properties[id].velocity.x = velocity.x;
properties[id].velocity.y = velocity.y;
properties[id].dir.x = dir.x;
properties[id].dir.y = dir.y;
}
""", "cs"
)
// Vertex buffer which holds the geometry to draw
val geometry = vertexBuffer(vertexFormat {
position(3)
}, 4)
//Create a unit quad
geometry.put {
write(Vector3(-1.0, -1.0, 0.0))
write(Vector3(-1.0, 1.0, 0.0))
write(Vector3(1.0, -1.0, 0.0))
write(Vector3(1.0, 1.0, 0.0))
}
// Vertex buffer holding transformation for each particle
val transformationsBuffer = vertexBuffer(vertexFormat {
attribute("transform", VertexElementType.MATRIX44_FLOAT32)
}, particleCount)
// Vertex buffer containing velocity and direction for each particle
val propertiesBuffer = vertexBuffer(vertexFormat {
attribute("velocity", VertexElementType.VECTOR2_FLOAT32)
attribute("direction", VertexElementType.VECTOR2_FLOAT32)
}, particleCount)
// Vertex buffer containing the previous step of the grid
val bufferA = vertexBuffer(
vertexFormat {
attribute("pos", VertexElementType.VECTOR3_FLOAT32)
attribute("dir", VertexElementType.VECTOR2_FLOAT32)
},
gridSize * gridSize
)
// Vertex buffer containing the next step of the grid
val bufferB = vertexBuffer(
vertexFormat {
attribute("pos", VertexElementType.VECTOR3_FLOAT32)
attribute("dir", VertexElementType.VECTOR2_FLOAT32)
},
gridSize * gridSize
)
// Initialize all buffers
bufferA.put {
for (i in 0 until gridSize * gridSize) {
// velocity
write(Vector3(0.0, 0.0, 0.0))
write(Vector2(0.0, 0.0))
}
}
bufferB.put {
for (i in 0 until gridSize * gridSize) {
// velocity
write(Vector3(0.0, 0.0, 0.0))
write(Vector2(0.0, 0.0))
}
}
// Initialize transform buffer
transformationsBuffer.put {
for (i in 0 until particleCount) {
write(transform {
translate(Math.random() * width, Math.random() * height)
rotate(Vector3.UNIT_Z, Math.random() * 360.0)
scale(2.0)
})
}
}
// Initialize buffer with velocities and directions
propertiesBuffer.put {
for (i in 0 until particleCount) {
// velocity
write(Vector2(Math.random() * .1 - .05, Math.random() * .1 - .05) * 20.0)
write(Vector2.fromPolar(Polar(Math.random() * 2 * PI, 1.0)))
}
}
// Define GUI
val gui = GUI()
val settings = object {
@DoubleParameter("cohesion", 0.0, 0.05)
var cohesion = 0.004
@DoubleParameter("alignment", 0.0, 0.05)
var alignment = 0.004
@DoubleParameter("separation", 0.0, 0.05)
var separation = 0.004
@DoubleParameter("target", 0.0, 0.5)
var target = 0.1
}
computeShader.uniform("computeWidth", computeWidth)
computeShader.uniform("width", width.toDouble())
computeShader.uniform("height", height.toDouble())
computeShader.uniform("gridSize", gridSize)
computeShader.buffer("transformsBuffer", transformationsBuffer)
computeShader.buffer("propertiesBuffer", propertiesBuffer)
computeShader.buffer("gridPrev", bufferA)
computeShader.buffer("gridNext", bufferB)
// Define rendering settings and color buffers for post-processing
val rt = renderTarget(width, height) {
colorBuffer()
}
val prevFrame = colorBuffer(width, height)
val currentFrame = colorBuffer(width, height)
gui.add(settings, "Parameters")
keyboard.keyDown.listen {
if (it.key == KEY_ENTER) {
randomize = true
}
}
extend(gui)
extend {
if (randomize) {
propertiesBuffer.put {
for (i in 0 until particleCount) {
// velocity
write(Vector2((Math.random() * .1 - .05), Math.random() * .1 - .05))
write(Vector2.fromPolar(Polar(Math.random() * 2 * PI, 1.0)))
}
}
randomize = false
}
// Swapping grid information at each step
if (swapIndex) {
//Clear bufferA
bufferA.put {
for (i in 0 until gridSize * gridSize) {
// velocity
write(Vector3(0.0, 0.0, 0.0))
write(Vector2(0.0, 0.0))
}
}
computeShader.buffer("gridPrev", bufferB)
computeShader.buffer("gridNext", bufferA)
} else {
// clear bufferB
bufferB.put {
for (i in 0 until gridSize * gridSize) {
// velocity
write(Vector3(0.0, 0.0, 0.0))
write(Vector2(0.0, 0.0))
}
}
computeShader.buffer("gridPrev", bufferA)
computeShader.buffer("gridNext", bufferB)
}
computeShader.uniform("separation", settings.separation)
computeShader.uniform("alignment", settings.alignment)
computeShader.uniform("cohesion", settings.cohesion)
computeShader.uniform("target", settings.target)
computeShader.uniform("mousePos", mouse.position)
// Render on external target
drawer.isolatedWithTarget(rt) {
drawer.clear(ColorRGBa.BLACK)
fill = ColorRGBa.WHITE.opacify(.1)
shadeStyle = shadeStyle {
vertexTransform = "x_viewMatrix = x_viewMatrix * i_transform;"
}
// Draw instances of quad
vertexBufferInstances(
listOf(geometry), listOf(transformationsBuffer), DrawPrimitive.TRIANGLE_STRIP, particleCount
)
}
computeShader.execute(computeWidth, computeHeight)
rt.colorBuffer(0).copyTo(currentFrame)
drawer.isolatedWithTarget(rt) {
shadeStyle = shadeStyle {
fragmentPreamble = """
vec3 laplacian(in vec2 uv, in sampler2D tex, in vec2 texelSize) {
vec3 rg = vec3(0.0);
rg += texture(tex, uv + vec2(-1.0, -1.0)*texelSize).rgb * 0.05;
rg += texture(tex, uv + vec2(-0.0, -1.0)*texelSize).rgb * 0.2;
rg += texture(tex, uv + vec2(1.0, -1.0)*texelSize).rgb * 0.05;
rg += texture(tex, uv + vec2(-1.0, 0.0)*texelSize).rgb * 0.2;
rg += texture(tex, uv + vec2(0.0, 0.0)*texelSize).rgb * -1;
rg += texture(tex, uv + vec2(1.0, 0.0)*texelSize).rgb * 0.2;
rg += texture(tex, uv + vec2(-1.0, 1.0)*texelSize).rgb * 0.05;
rg += texture(tex, uv + vec2(0.0, 1.0)*texelSize).rgb * 0.2;
rg += texture(tex, uv + vec2(1.0, 1.0)*texelSize).rgb * 0.05;
return rg;
}
""".trimIndent()
fragmentTransform = """
vec2 texCoord = c_boundsPosition.xy;
texCoord.y = 1.0 - texCoord.y;
vec3 currentColor = texture(p_currentFrame, texCoord).rgb;
vec2 size = textureSize(p_pastFrame, 0);
vec3 diffuse = laplacian(texCoord, p_currentFrame, 1.0/size);
vec3 prevColor = texture(p_pastFrame, texCoord).rgb;
x_fill.rgb = currentColor + diffuse * 0.9 + prevColor * 0.94;
""".trimIndent()
parameter("currentFrame", currentFrame)
parameter("pastFrame", prevFrame)
}
drawer.rectangle(drawer.bounds)
}
rt.colorBuffer(0).copyTo(prevFrame)
// Colorize
drawer.isolatedWithTarget(rt) {
drawer.shadeStyle = shadeStyle {
fragmentTransform = """
vec2 texCoord = c_boundsPosition.xy;
texCoord.y = 1.0 - texCoord.y;
vec2 size = textureSize(p_image, 0);
float t = texture(p_image, texCoord).x;
vec3 col = mix(vec3(0.0), mix(vec3(2 * t, 0.0, 0.0), vec3(1.0), min(t * 1.0, 1.0)), min(t * 1.0, 1.0));
x_fill.rgb = col;
""".trimIndent()
parameter("image", prevFrame)
}
drawer.rectangle(drawer.bounds)
}
drawer.image(rt.colorBuffer(0))
swapIndex = !swapIndex
}
}
}
Let me explain why it is a “cheap” (but kinda working) approach to a boids simulation.
Essentially, what I do is to a create a grid buffer to check fixed radius interactions, to avoid looping over all the boids, which is terribly expensive. This is quite the usual story for particle simulation and fluid simulation on GPU, but here’s how the approach above diverges: the grid buffer does not contain particle indices, but rather cumulated quantities like position and direction for each grid cell. The reason is that to properly retrieve particle infos from a grid of indices one heavily requires sorting the particle buffer by grid index (check here and here for a nice explanation). I wanted to avoid sorting a buffer on the GPU with a bitonic sorter or something like that, so I came up with this cumulative approach (I have not really seen this approach around, but I am sure someone already implemented it). In particular, I also store the number of boids that are present in a given grid cell, so I can average the positions and directions to implement local averaged alignment and cohesion behaviors. The separation behavior is harder to approximate with this approach, so I went for separation from the “densest” cell in the neighborhood. Even with all the approximations, it seems to nicely develop a flocking behavior.
Of course any comment/suggestion is welcome!