9
\$\begingroup\$

I've written a short program to learn about computing with drawing using OpenGL 3. To do so, I created a program to create two triangles, which form a quad, and pass all the arguments for computing the fractal as uniforms. I assumed they were slower for individual access, and copied them in vertex shader to attributes, so that it is efficiently computed in fragment shader. Here is my attempt:

Code

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <assert.h>
#include <GL/glew.h>
#if defined (__APPLE_CC__)
 #include <OpenGL/gl3.h>
#else
 #include <GL/gl3.h> /* assert OpenGL 3.2 core profile available. */
#endif
#define GLFW_INCLUDE_GL3 /* don't drag in legacy GL headers. */
#define GLFW_NO_GLU /* don't drag in the old GLU lib - unless you must. */
#include <GLFW/glfw3.h>
void read_file(const char *filename, char *buf) {
 FILE *file = fopen(filename, "r");
 assert(file);
 char c; int i = 0;
 while((c = fgetc(file)) != EOF)
 buf[i++] = c;
 buf[i] = '0円';
 fclose(file);
}
#ifndef NDEBUG
#define GLERROR assert(glGetError() == GL_NO_ERROR);
#else
#define GLERROR
#endif
main() {
setup_glfw:;
 if(!glfwInit()) {
 printf("glfw init fail\n");
 return 1;
 }
 #if defined(__APPLE_CC__)
 glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
 glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
 glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
 glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
 #endif
 GLFWwindow *window = glfwCreateWindow(960, 960, "mandelbrot", NULL, NULL);
 if(window == NULL) {
 printf("window creation epic fail\n");
 return 1;
 }
 glfwMakeContextCurrent(window); GLERROR
 glewExperimental = GL_TRUE;
 if(glewInit() != GLEW_OK) {
 printf("glew init fail\n");
 return 1;
 }
program:;
 GLuint program;
 {
 program = glCreateProgram(); GLERROR
 char vert_src[1024];
 const char *vert_src_const = vert_src;
 GLuint vert = glCreateShader(GL_VERTEX_SHADER); GLERROR
 read_file("mandelbrot.vert", vert_src);
 glShaderSource(vert, 1, &vert_src_const, NULL); GLERROR
 glCompileShader(vert); GLERROR
 char frag_src[1024];
 const char *frag_src_const = frag_src;
 GLuint frag = glCreateShader(GL_FRAGMENT_SHADER); GLERROR
 read_file("mandelbrot.frag", frag_src);
 glShaderSource(frag, 1, &frag_src_const, NULL); GLERROR
 glCompileShader(frag); GLERROR
 glAttachShader(program, vert); GLERROR
 glAttachShader(program, frag); GLERROR
 glLinkProgram(program); GLERROR
 glDeleteShader(vert); GLERROR
 glDeleteShader(frag); GLERROR
 glBindAttribLocation(program, 0, "position"); GLERROR
 }
quad:;
 GLuint vbo;
 GLuint vao;
 {
 float coords[] = {
 -1,-1,
 1,-1,
 1,1,
 -1,1,
 -1,-1,
 };
 glGenBuffers(1, &vbo); GLERROR
 glBindBuffer(GL_ARRAY_BUFFER, vbo); GLERROR
 glBufferData(GL_ARRAY_BUFFER, 10 * sizeof(float), coords, GL_STATIC_DRAW); GLERROR
 glGenVertexArrays(1, &vao); GLERROR
 glBindVertexArray(vao); GLERROR
 glEnableVertexAttribArray(0); GLERROR
 glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, NULL); GLERROR
 }
mandelbrot_args:;
 float tr_x = -0.63f, tr_y = 0.0f;
 GLuint u_translate = glGetUniformLocation(program, "translate"); GLERROR
 glProgramUniform2f(program, u_translate, tr_x, tr_y); GLERROR
 float sc = 1.5f;
 float tr_step = sc/10;
 GLuint u_scale = glGetUniformLocation(program, "scale"); GLERROR
 glProgramUniform1f(program, u_scale, sc); GLERROR;
 GLuint u_max = glGetUniformLocation(program, "maxu"); GLERROR
 glProgramUniform1f(program, u_max, 100); GLERROR;
 GLuint u_iter = glGetUniformLocation(program, "iteru"); GLERROR
 glProgramUniform1f(program, u_iter, 2000); GLERROR;
display:;
 while(!glfwWindowShouldClose(window)) {
 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); GLERROR
 glUseProgram(program); GLERROR
 glDrawArrays(GL_TRIANGLES, 0, 3); GLERROR
 glDrawArrays(GL_TRIANGLES, 2, 3); GLERROR
 glfwPollEvents(); GLERROR
 glfwSwapBuffers(window); GLERROR
 {
exit_condition:;
 if(glfwGetKey(window, GLFW_KEY_ESCAPE)) {
 glfwSetWindowShouldClose(window, 1); GLERROR
 }
translation:;
 {
 bool change_tr = false;
 if(glfwGetKey(window, GLFW_KEY_LEFT)) {
 tr_x += tr_step, change_tr = true;
 } else if(glfwGetKey(window, GLFW_KEY_RIGHT)) {
 tr_x -= tr_step, change_tr = true;
 } else if(glfwGetKey(window, GLFW_KEY_DOWN)) {
 tr_y += tr_step, change_tr = true;
 } else if(glfwGetKey(window, GLFW_KEY_UP)) {
 tr_y -= tr_step, change_tr = true;
 }
 if(change_tr) {
 glProgramUniform2f(program, u_translate, tr_x, tr_y); GLERROR
 }
 }
scaling:;
 {
 bool change_sc = false;
 if(glfwGetKey(window, GLFW_KEY_EQUAL)) {
 sc /= 1.1, change_sc = true;
 } else if(glfwGetKey(window, GLFW_KEY_MINUS)) {
 sc *= 1.1, change_sc = true;
 }
 if(change_sc) {
 glProgramUniform1f(program, u_scale, sc); GLERROR
 tr_step = sc / 10;
 }
 }
 }
 }
end_display:;
 glDeleteBuffers(1, &vbo); GLERROR
 glDeleteVertexArrays(1, &vao); GLERROR
 glDeleteProgram(program); GLERROR
 glfwTerminate(); GLERROR
}

Shaders

  • mandelbrot.vert

     #version 330
     layout (location = 0) in vec2 position;
     uniform vec2 translate;
     uniform float scale;
     uniform float maxu;
     uniform float iteru;
     out vec2 coord;
     out float max;
     out float iter;
     void main(void) {
     gl_Position = vec4(position, 0, 1);
     iter = iteru;
     max = maxu;
     coord = position * scale + translate;
     }
    
  • mandelbrot.frag

     #version 330
     in vec2 coord;
     in float max;
     in float iter;
     out vec4 fragcolor;
     #define MAX(a, b) (((a) > (b)) ? (a) : (b))
     void main(void) {
     vec2 v = coord;
     float t = 0;
     while(t < iter) {
     if(length(v) > max) {
     break;
     }
     float v_x = v.x*v.x - v.y*v.y + coord.x;
     v.y = 2*v.x*v.y + coord.y;
     v.x = v_x;
     ++t;
     }
     fragcolor = vec4((t / iter), (MAX(t - iter/2, 0) / iter), 0, 0);
     }
    

    no description

Compile as:

  • OSX:

    -framework OpenGL $(pkg-config --libs --cflags glfw3)
    
  • Linux:

    • Fedora

      $(pkg-config --libs --cflags libglfw3 gl)
      
    • Ubuntu

      $(pkg-config --libs --cflags glfw3 gl)
      

Could you, please, point me to better practices and optimisations?

Edit

I found one way to improve the performance: to turn the while loop into a for loop. It seems to become partially unrolled on some devices, and therefore runs faster.

Another useful improvement is to wait for a key press in a while loop, for better interaction speed.

Toby Speight
87.9k14 gold badges104 silver badges325 bronze badges
asked Jun 17, 2017 at 18:48
\$\endgroup\$
0

2 Answers 2

7
\$\begingroup\$

This is pretty cool! Several people have posted their Mandelbrot generators here asking for advice on making them faster, and I always recommend that they implement it on the GPU. Glad to see someone finally did! Nice work!

Shaders

My main concern with your shaders is that the names of your uniforms and attributes aren't as clear as they could be. For example, I assumed max was the maximum number of iterations and that iter was the current number of iterations. I would change max to escapeDistance (or similar) as it is the distance past which the calculations escape to infinity. I'd change iter to maxIters. In general max is a bad name not only because it's vague (max what?), but also because there are various functions and macros named max (or MAX) in most popular languages. It just makes things confusing.

If you want to make your color palette more flexible, I recommend passing in a 1D texture containing a look-up table of colors and using the iterations as an index into it (possibly with wrapping and/or reflection).

Other than that, your shaders are pretty efficient. They don't do more than they need, and there are no texture samples or anything like that, so there's not a lot to improve.

Onto your C code.

Functions

It's not clear to me whether there's some compiler magic that turns your labels into functions or something, but as it's written, the code is far too long to put it all into main(). I would break it out into individual functions if that's not already happening automatically. Even if it is, it's very weird to see it all written as if it were part of main().

File Reading

You're reading your shader files 1 character at a time. While they're short, that's just inefficient. You could at the least use getline() to read a line at a time. Or just check the length of the file, allocate a buffer big enough for it and read it all in in 1 shot.

GLERROR

I would change your GLERROR from a macro to a function that does the following:

GLenum CheckGLError(const char* filename, const int linenum)
{
 GLenum result = glGetError();
 if (result != GL_NO_ERROR)
 {
 fprintf(stderr, "OpenGL Error on line %d in file %s: 0x%0x", linen, filename, result);
 }
 return result;
}

And then I'd write a macro that was empty in release builds, but which called the above function in debug. This will remove the calls in release builds. It's very important to call glGetError() during development, but it can really slow things down in release, so it's best to remove it in release.

Use struct when appropriate

For drawing your quad, I recommend not using a straight array of floats. You should use a struct that describes what your data actually is. I recommend something simple like this:

typedef struct Point2D {
 float x;
 float y;
} Point2D;

Then you can define your coordinate array as:

Point2D coord[] = {
 { -1.0, -1.0 },
 { 1.0, -1.0 },
 { 1.0, 1.0 },
 { -1.0, 1.0 },
 { -1.0, -1.0 }
};

Optimizations

I don't see a lot of room for optimizations here. I've not written a compute shader before, so I don't know how that would compare. I don't imagine it would be a lot faster, but one way to find out is to do it and time it and compare with the fragment shader version.

The only opportunity to reduce branching that I see is if you always did the calculation up to max iterations, but stopped counting after reaching the escape distance. This would use one less branch if you did it using a step function to increment the number of iterations used. In pseudo-code it would look something like this:

int numIterations = 0;
for (int i = 0; i < maxIterations; i++)
{
 // Only add one if length(v) is less than escape distance
 numIterations += 1.0 - step(escapeDistance, length(v));
 // continue calculations
}

I can't say without profiling whether that would be any faster though, as you're trading off the number of branches for the number of iterations so it may end up being a wash. But as always, try it and see!

answered Jun 18, 2017 at 2:41
\$\endgroup\$
3
  • \$\begingroup\$ What about actual optimizations? My question is too general, I suppose, but, for example, does it make sense to use a compute shader to vectorise the algorithm? Better ways to reduce branching? When moving screen, the fragments are recomputed, not memoized. +1 for struct, I didn't know such practice. I tried to avoid using functions and boilerplates to make the code as short as possible. Labels act like comments, my magic compiler does not turn them into anything :) \$\endgroup\$ Commented Jun 18, 2017 at 12:22
  • \$\begingroup\$ I've added a section on optimizations above. \$\endgroup\$ Commented Jun 18, 2017 at 16:27
  • \$\begingroup\$ You might be right that there is no room for optimizations here, but I will keep this question open to see if it is not the case. \$\endgroup\$ Commented Jun 18, 2017 at 19:16
1
\$\begingroup\$

Avoid double math

1.1 is a double so sc /= 1.1; is like sc = (float)((double) sc / 1.1);

Use a float constant instead: sc /= 1.1f;.

Tip: Enable all compiler warnings.

fgetc() returns 257 different values

Saving fgetc() result in a char can lose information and cause an early stop or infinite loop. Use an int.

// char c; int i = 0;
// while((c = fgetc(file)) != EOF)
int c; int i = 0;
while((c = fgetc(file)) != EOF)

Avoid buffer overruns

Test input length

// void read_file(const char *filename, char *buf) {
// while((c = fgetc(file)) != EOF)
void read_file(const char *filename, size_t size, char *buf) {
 size i = 0;
 while(i + 1 < size && (c = fgetc(file)) != EOF)
 i++;

Avoid naked magic numbers

Why 10?:

glBufferData(GL_ARRAY_BUFFER, 10 * sizeof(float), coords, GL_STATIC_DRAW);

Perhaps ?

glBufferData(GL_ARRAY_BUFFER, sizeof coords, coords, GL_STATIC_DRAW);

Why 1024?

char vert_src[1024];

Why missing return type?

main() {

answered Jul 22, 2023 at 11:34
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.