I've written a short program to learn about computing with drawing using OpenGL 3. To do so, I created a program to create two triangles, which form a quad, and pass all the arguments for computing the fractal as uniforms. I assumed they were slower for individual access, and copied them in vertex shader to attributes, so that it is efficiently computed in fragment shader. Here is my attempt:
Code
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <assert.h>
#include <GL/glew.h>
#if defined (__APPLE_CC__)
#include <OpenGL/gl3.h>
#else
#include <GL/gl3.h> /* assert OpenGL 3.2 core profile available. */
#endif
#define GLFW_INCLUDE_GL3 /* don't drag in legacy GL headers. */
#define GLFW_NO_GLU /* don't drag in the old GLU lib - unless you must. */
#include <GLFW/glfw3.h>
void read_file(const char *filename, char *buf) {
FILE *file = fopen(filename, "r");
assert(file);
char c; int i = 0;
while((c = fgetc(file)) != EOF)
buf[i++] = c;
buf[i] = '0円';
fclose(file);
}
#ifndef NDEBUG
#define GLERROR assert(glGetError() == GL_NO_ERROR);
#else
#define GLERROR
#endif
main() {
setup_glfw:;
if(!glfwInit()) {
printf("glfw init fail\n");
return 1;
}
#if defined(__APPLE_CC__)
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
#endif
GLFWwindow *window = glfwCreateWindow(960, 960, "mandelbrot", NULL, NULL);
if(window == NULL) {
printf("window creation epic fail\n");
return 1;
}
glfwMakeContextCurrent(window); GLERROR
glewExperimental = GL_TRUE;
if(glewInit() != GLEW_OK) {
printf("glew init fail\n");
return 1;
}
program:;
GLuint program;
{
program = glCreateProgram(); GLERROR
char vert_src[1024];
const char *vert_src_const = vert_src;
GLuint vert = glCreateShader(GL_VERTEX_SHADER); GLERROR
read_file("mandelbrot.vert", vert_src);
glShaderSource(vert, 1, &vert_src_const, NULL); GLERROR
glCompileShader(vert); GLERROR
char frag_src[1024];
const char *frag_src_const = frag_src;
GLuint frag = glCreateShader(GL_FRAGMENT_SHADER); GLERROR
read_file("mandelbrot.frag", frag_src);
glShaderSource(frag, 1, &frag_src_const, NULL); GLERROR
glCompileShader(frag); GLERROR
glAttachShader(program, vert); GLERROR
glAttachShader(program, frag); GLERROR
glLinkProgram(program); GLERROR
glDeleteShader(vert); GLERROR
glDeleteShader(frag); GLERROR
glBindAttribLocation(program, 0, "position"); GLERROR
}
quad:;
GLuint vbo;
GLuint vao;
{
float coords[] = {
-1,-1,
1,-1,
1,1,
-1,1,
-1,-1,
};
glGenBuffers(1, &vbo); GLERROR
glBindBuffer(GL_ARRAY_BUFFER, vbo); GLERROR
glBufferData(GL_ARRAY_BUFFER, 10 * sizeof(float), coords, GL_STATIC_DRAW); GLERROR
glGenVertexArrays(1, &vao); GLERROR
glBindVertexArray(vao); GLERROR
glEnableVertexAttribArray(0); GLERROR
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, NULL); GLERROR
}
mandelbrot_args:;
float tr_x = -0.63f, tr_y = 0.0f;
GLuint u_translate = glGetUniformLocation(program, "translate"); GLERROR
glProgramUniform2f(program, u_translate, tr_x, tr_y); GLERROR
float sc = 1.5f;
float tr_step = sc/10;
GLuint u_scale = glGetUniformLocation(program, "scale"); GLERROR
glProgramUniform1f(program, u_scale, sc); GLERROR;
GLuint u_max = glGetUniformLocation(program, "maxu"); GLERROR
glProgramUniform1f(program, u_max, 100); GLERROR;
GLuint u_iter = glGetUniformLocation(program, "iteru"); GLERROR
glProgramUniform1f(program, u_iter, 2000); GLERROR;
display:;
while(!glfwWindowShouldClose(window)) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); GLERROR
glUseProgram(program); GLERROR
glDrawArrays(GL_TRIANGLES, 0, 3); GLERROR
glDrawArrays(GL_TRIANGLES, 2, 3); GLERROR
glfwPollEvents(); GLERROR
glfwSwapBuffers(window); GLERROR
{
exit_condition:;
if(glfwGetKey(window, GLFW_KEY_ESCAPE)) {
glfwSetWindowShouldClose(window, 1); GLERROR
}
translation:;
{
bool change_tr = false;
if(glfwGetKey(window, GLFW_KEY_LEFT)) {
tr_x += tr_step, change_tr = true;
} else if(glfwGetKey(window, GLFW_KEY_RIGHT)) {
tr_x -= tr_step, change_tr = true;
} else if(glfwGetKey(window, GLFW_KEY_DOWN)) {
tr_y += tr_step, change_tr = true;
} else if(glfwGetKey(window, GLFW_KEY_UP)) {
tr_y -= tr_step, change_tr = true;
}
if(change_tr) {
glProgramUniform2f(program, u_translate, tr_x, tr_y); GLERROR
}
}
scaling:;
{
bool change_sc = false;
if(glfwGetKey(window, GLFW_KEY_EQUAL)) {
sc /= 1.1, change_sc = true;
} else if(glfwGetKey(window, GLFW_KEY_MINUS)) {
sc *= 1.1, change_sc = true;
}
if(change_sc) {
glProgramUniform1f(program, u_scale, sc); GLERROR
tr_step = sc / 10;
}
}
}
}
end_display:;
glDeleteBuffers(1, &vbo); GLERROR
glDeleteVertexArrays(1, &vao); GLERROR
glDeleteProgram(program); GLERROR
glfwTerminate(); GLERROR
}
Shaders
mandelbrot.vert
#version 330 layout (location = 0) in vec2 position; uniform vec2 translate; uniform float scale; uniform float maxu; uniform float iteru; out vec2 coord; out float max; out float iter; void main(void) { gl_Position = vec4(position, 0, 1); iter = iteru; max = maxu; coord = position * scale + translate; }
mandelbrot.frag
#version 330 in vec2 coord; in float max; in float iter; out vec4 fragcolor; #define MAX(a, b) (((a) > (b)) ? (a) : (b)) void main(void) { vec2 v = coord; float t = 0; while(t < iter) { if(length(v) > max) { break; } float v_x = v.x*v.x - v.y*v.y + coord.x; v.y = 2*v.x*v.y + coord.y; v.x = v_x; ++t; } fragcolor = vec4((t / iter), (MAX(t - iter/2, 0) / iter), 0, 0); }
Compile as:
OSX:
-framework OpenGL $(pkg-config --libs --cflags glfw3)
Linux:
Fedora
$(pkg-config --libs --cflags libglfw3 gl)
Ubuntu
$(pkg-config --libs --cflags glfw3 gl)
Could you, please, point me to better practices and optimisations?
Edit
I found one way to improve the performance: to turn the while
loop into a for
loop. It seems to become partially unrolled on some devices, and therefore runs faster.
Another useful improvement is to wait for a key press in a while
loop, for better interaction speed.
2 Answers 2
This is pretty cool! Several people have posted their Mandelbrot generators here asking for advice on making them faster, and I always recommend that they implement it on the GPU. Glad to see someone finally did! Nice work!
Shaders
My main concern with your shaders is that the names of your uniforms and attributes aren't as clear as they could be. For example, I assumed max
was the maximum number of iterations and that iter
was the current number of iterations. I would change max
to escapeDistance
(or similar) as it is the distance past which the calculations escape to infinity. I'd change iter
to maxIters
. In general max
is a bad name not only because it's vague (max what?), but also because there are various functions and macros named max
(or MAX
) in most popular languages. It just makes things confusing.
If you want to make your color palette more flexible, I recommend passing in a 1D texture containing a look-up table of colors and using the iterations as an index into it (possibly with wrapping and/or reflection).
Other than that, your shaders are pretty efficient. They don't do more than they need, and there are no texture samples or anything like that, so there's not a lot to improve.
Onto your C code.
Functions
It's not clear to me whether there's some compiler magic that turns your labels into functions or something, but as it's written, the code is far too long to put it all into main()
. I would break it out into individual functions if that's not already happening automatically. Even if it is, it's very weird to see it all written as if it were part of main()
.
File Reading
You're reading your shader files 1 character at a time. While they're short, that's just inefficient. You could at the least use getline() to read a line at a time. Or just check the length of the file, allocate a buffer big enough for it and read it all in in 1 shot.
GLERROR
I would change your GLERROR
from a macro to a function that does the following:
GLenum CheckGLError(const char* filename, const int linenum)
{
GLenum result = glGetError();
if (result != GL_NO_ERROR)
{
fprintf(stderr, "OpenGL Error on line %d in file %s: 0x%0x", linen, filename, result);
}
return result;
}
And then I'd write a macro that was empty in release builds, but which called the above function in debug. This will remove the calls in release builds. It's very important to call glGetError()
during development, but it can really slow things down in release, so it's best to remove it in release.
Use struct
when appropriate
For drawing your quad, I recommend not using a straight array of float
s. You should use a struct
that describes what your data actually is. I recommend something simple like this:
typedef struct Point2D {
float x;
float y;
} Point2D;
Then you can define your coordinate array as:
Point2D coord[] = {
{ -1.0, -1.0 },
{ 1.0, -1.0 },
{ 1.0, 1.0 },
{ -1.0, 1.0 },
{ -1.0, -1.0 }
};
Optimizations
I don't see a lot of room for optimizations here. I've not written a compute shader before, so I don't know how that would compare. I don't imagine it would be a lot faster, but one way to find out is to do it and time it and compare with the fragment shader version.
The only opportunity to reduce branching that I see is if you always did the calculation up to max iterations, but stopped counting after reaching the escape distance. This would use one less branch if you did it using a step function to increment the number of iterations used. In pseudo-code it would look something like this:
int numIterations = 0;
for (int i = 0; i < maxIterations; i++)
{
// Only add one if length(v) is less than escape distance
numIterations += 1.0 - step(escapeDistance, length(v));
// continue calculations
}
I can't say without profiling whether that would be any faster though, as you're trading off the number of branches for the number of iterations so it may end up being a wash. But as always, try it and see!
-
\$\begingroup\$ What about actual optimizations? My question is too general, I suppose, but, for example, does it make sense to use a compute shader to vectorise the algorithm? Better ways to reduce branching? When moving screen, the fragments are recomputed, not memoized. +1 for
struct
, I didn't know such practice. I tried to avoid using functions and boilerplates to make the code as short as possible. Labels act like comments, my magic compiler does not turn them into anything :) \$\endgroup\$theoden8– theoden82017年06月18日 12:22:46 +00:00Commented Jun 18, 2017 at 12:22 -
\$\begingroup\$ I've added a section on optimizations above. \$\endgroup\$user1118321– user11183212017年06月18日 16:27:31 +00:00Commented Jun 18, 2017 at 16:27
-
\$\begingroup\$ You might be right that there is no room for optimizations here, but I will keep this question open to see if it is not the case. \$\endgroup\$theoden8– theoden82017年06月18日 19:16:16 +00:00Commented Jun 18, 2017 at 19:16
Avoid double
math
1.1
is a double
so sc /= 1.1;
is like sc = (float)((double) sc / 1.1);
Use a float
constant instead: sc /= 1.1f;
.
Tip: Enable all compiler warnings.
fgetc()
returns 257 different values
Saving fgetc()
result in a char
can lose information and cause an early stop or infinite loop. Use an int
.
// char c; int i = 0;
// while((c = fgetc(file)) != EOF)
int c; int i = 0;
while((c = fgetc(file)) != EOF)
Avoid buffer overruns
Test input length
// void read_file(const char *filename, char *buf) {
// while((c = fgetc(file)) != EOF)
void read_file(const char *filename, size_t size, char *buf) {
size i = 0;
while(i + 1 < size && (c = fgetc(file)) != EOF)
i++;
Avoid naked magic numbers
Why 10?:
glBufferData(GL_ARRAY_BUFFER, 10 * sizeof(float), coords, GL_STATIC_DRAW);
Perhaps ?
glBufferData(GL_ARRAY_BUFFER, sizeof coords, coords, GL_STATIC_DRAW);
Why 1024?
char vert_src[1024];
Why missing return type?
main() {