I am writing a utility class to store RGB images and do a few utility functions as a way to learn C++.
#include <fstream>
#include <iostream>
#include <numeric>
#include <cmath>
using namespace std;
#define RGB_H 203 // CHANGE THIS DEPENDING ON THE IMAGE NOTE 1512 before 810
#define RGB_W 360 // CHANGE THIS DEPENDING ON THE IMAGE NOTE 2016 before 1440
#define RGB_D 3
#define MAX_COLOR 255
#define R_COEFF 0.2989
#define G_COEFF 0.5870
#define B_COEFF 0.1140
#define PI 3.141592653
class RGBImage {
private:
public:
int height, width;
unsigned int img[RGB_H][RGB_W][RGB_D];
RGBImage() {
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
for(short d = 0; d < RGB_D; d++) {
img[i][j][d] = 127;
}
}
}
}
RGBImage(string filepath) {
ifstream fin(filepath);
string a;
int y, x, w;
fin >> a >> y >> x >> w;
for (size_t y = 0; y < RGB_H; y++) {
for (size_t x = 0; x < RGB_W; x++)
{
unsigned int red, green, blue;
fin >> red >> green >> blue;
if (!fin)
{
std::cerr << "Error reading from file around (" << y << "," << x << ")" << std::endl;
return;
}
img[y][x][0] = red;
img[y][x][1] = green;
img[y][x][2] = blue;
}
}
}
~RGBImage() {}
void write(string filepath) {
ofstream MyFile;
MyFile.open(filepath, ios::out);
MyFile << "P3" << RGB_W << " " << RGB_H << " " << MAX_COLOR << endl ;
for(size_t i = 0; i < RGB_H; i++) {
if(i == 0) {
MyFile << endl;
}
for(size_t j = 0; j < RGB_W; j++) {
unsigned int r = img[i][j][0];
unsigned int g = img[i][j][1];
unsigned int b = img[i][j][2];
MyFile << r << " " << g << " " << b << " ";
}
}
MyFile.close();
}
// reduces the number of colors in each channel by quantizing
void reduceColors(short colFactor) {
int numColors = MAX_COLOR / colFactor;
if(numColors > MAX_COLOR) {
return;
}
int temp;
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
for(short d = 0; d < RGB_D; d++) {
temp = img[i][j][d] / numColors;
temp *= numColors;
img[i][j][d] = temp;
}
}
}
}
// makes the image dimmer
void dim(float dimFactor) {
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
for(short d = 0; d < RGB_D; d++) {
img[i][j][d] *= dimFactor;
}
}
}
}
// converts self to a grayscale image
void toGrayscale() {
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
unsigned int r = img[i][j][0];
unsigned int g = img[i][j][1];
unsigned int b = img[i][j][2];
// unsigned int result = (r + g + b)/3;
unsigned int result = r * R_COEFF + g * G_COEFF + b * B_COEFF;
img[i][j][0] = result;
img[i][j][1] = result;
img[i][j][2] = result;
}
}
}
void binaryThresh(int threshValue) {
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
for(short d = 0; d < RGB_D; d++) {
if(img[i][j][d] > threshValue) {
img[i][j][d] = MAX_COLOR;
}
else {
img[i][j][d] = 0;
}
}
}
}
}
// returns index 0 and also zero pads the result
int getGrayscale(int i, int j) {
if(i < 0 || j < 0 || i > RGB_H || i > RGB_W) {
return 0; // zero padding
}
return img[i][j][0];
}
void setGrayscale(int i, int j, unsigned int val) {
img[i][j][0] = val;
img[i][j][1] = val;
img[i][j][2] = val;
}
RGBImage conv2d(std::vector<std::vector<int>> kernel) {
RGBImage out = RGBImage();
int kernelSum = 0;
for(size_t a = 0; a < kernel.size(); a++) {
kernelSum += accumulate(kernel[a].begin(), kernel[a].end(), 0);
}
if(kernelSum == 0) {
kernelSum = 1;
}
int total;
for (size_t y = 1; y < RGB_H - 1; y++) {
for (size_t x = 1; x < RGB_W - 1; x++) {
total = 0;
for (short relativeX = -1; relativeX <= 1; relativeX++) { // NOTE hardcoded 3x3 convolution kernel size
for (short relativeY = -1; relativeY <= 1; relativeY++) {
total += getGrayscale(y + relativeY, x + relativeX) * kernel[relativeY + 1][relativeX + 1];;
}
}
out.img[y][x][0] = total/kernelSum;
out.img[y][x][1] = total/kernelSum;
out.img[y][x][2] = total/kernelSum;
// setGrayscale(y, x, total/kernelSum);
}
}
return out;
}
RGBImage canny(std::vector<std::vector<int>> sobelx, std::vector<std::vector<int>> sobely, RGBImage &other, int low, int high) {
// currently only uses theta to calculate gradient
// first, convolve with sobelx and sobely
static RGBImage sobelxImg = other.conv2d(sobelx);
static RGBImage sobelyImg = other.conv2d(sobely);
static RGBImage out = RGBImage();
float(*thetas)[RGB_W] = new float[RGB_H][RGB_W]; // matrix
// put together, calculate theta
for(size_t i = 0; i < RGB_H; i++) {
for(size_t j = 0; j < RGB_W; j++) {
int gx = sobelxImg.getGrayscale(i, j);
int gy = sobelyImg.getGrayscale(i, j);
int mag = sqrt(gx*gx + gy*gy);
float theta = atan2f(gy, gx);
out.setGrayscale(i, j, mag);
thetas[i][j] = theta;
}
}
int G;
int otherAvgG;
for(size_t i = 1; i < RGB_H - 1; i++) {
for(size_t j = 1; j < RGB_W - 1; j++) {
float theta = thetas[i][j];
// if(theta + PI/2 > PI) {
// theta -= PI/2;
// }
// else {
// theta += PI/2;
// }
float theta2 = theta;
if(theta < 0) {
theta2 = theta + 2 * PI;
}
// cout << "theta is " << theta << " at " << i << ", " << j << endl;
G = out.getGrayscale(i, j);
if((theta > -PI/8 && theta < PI/8) || (theta > 7*PI/8 && theta2 < 9*PI/8)) { // right/left
otherAvgG = (out.getGrayscale(i, j-1) + out.getGrayscale(i, j+1))/2;
if(G < otherAvgG) {
out.setGrayscale(i, j, 0);
}
}
else if((PI/8 < theta && theta < 3*PI/8) || (-7*PI/8 < theta && theta < 5*PI/8)) { // diagonals, upright, downleft
otherAvgG = (out.getGrayscale(i-1, j+1) + out.getGrayscale(i+1, j-1))/2;
if(G < otherAvgG) {
out.setGrayscale(i, j, 0);
}
}
else if((5*PI/8 < theta && theta < 7*PI/8) || (-3*PI/8 < theta && theta < -PI/8)) { // diagonals, upleft, downright
otherAvgG = (out.getGrayscale(i-1, j-1) + out.getGrayscale(i+1, j+1))/2;
if(G < otherAvgG) {
out.setGrayscale(i, j, 0);
}
}
else if((3*PI/8 < theta && theta < 5*PI/8) || (-5*PI/8 < theta && theta < -3*PI/8)) { // up/down
otherAvgG = (out.getGrayscale(i-1, j) + out.getGrayscale(i+1, j))/2;
if(G < otherAvgG) {
out.setGrayscale(i, j, 0);
}
}
// else {
// cout << "error at: " << i << ", " << j << ", theta is: " << theta << endl;
// }
}
}
// loop over again do a simple double threshold with high/low
for(size_t i = 1; i < RGB_H - 1; i++) {
for(size_t j = 1; j < RGB_W - 1; j++) {
if(getGrayscale(i, j) < low) { // not an edge pixel
out.setGrayscale(i, j, 0);
// out.img[i][j][2] = 255;
}
else if(getGrayscale(i, j) > high) { // strong edge pixel
out.setGrayscale(i, j, 255);
// out.img[i][j][1] = 255;
}
else { // weak edge pixel
// out.img[i][j][0] = 255;
out.setGrayscale(i, j, 127);
}
}
}
return out;
}
void thinEdges(RGBImage &img) {
// loop until image has not changed
bool changed = true;
while (changed) {
changed = false;
// loop over the pixels in the image
for (size_t y = 1; y < RGB_H - 1; y++) {
for (size_t x = 1; x < RGB_W - 1; x++) {
int G = img.getGrayscale(y, x);
if (G != 255) {
// pixel is not a strong edge pixel, skip it
continue;
}
else { // weak edge pixel, or nothing
// check if pixel is aligned with its neighbors
bool aligned = false;
int topG = img.getGrayscale(y - 1, x);
int bottomG = img.getGrayscale(y + 1, x);
int leftG = img.getGrayscale(y, x - 1);
int rightG = img.getGrayscale(y, x + 1);
if (topG == 255 || bottomG == 255) {
// pixel is aligned with its vertical neighbors
aligned = true;
}
else if (leftG == 255 || rightG == 255) {
// pixel is aligned with its horizontal neighbors
aligned = true;
}
if (aligned) {
// pixel is a weak edge pixel and aligned with its neighbors, remove it
img.setGrayscale(y, x, 0);
changed = true;
}
}
}
}
}
}
void combineImgs(const RGBImage &img1, const RGBImage &img2) {
for(int i = 0; i < RGB_H; i++) {
for(int j = 0; j < RGB_W; j++) {
for(short d = 0; d < RGB_D; d++) {
// img[i][j][d] = (unsigned int)sqrtf(pow(img1.img[i][j][d], 2) + pow(img2.img[i][j][d], 2));
img[i][j][d] = (unsigned int)(img1.img[i][j][d] + img2.img[i][j][d]);
// cout << img[i][j][d];
}
}
}
}
};
Unit test:
using namespace std;
const std::vector<vector<int>> gaussian = {{1, 2, 1}, {2, 4, 2}, {1, 2, 1}};
int main() {
std::string filepath = "image.ppm"; // P3 PPM Image with size hardcoded into #define
RGBImage img = RGBImage(filepath);
static RGBImage img001 = img.conv2d(gaussian);
img.write("image_blur.ppm");
return 0;
}
Every method in this code works, however, I am forced to statically allocate every RGBImage that is instantiated: static RGBImage img = RGBImage(filepath);
, and I'm not sure why this is the case. I've looked into using a triple pointer (e.g using new
and ***img
, and also std::array
and std::vector
. There are also solutions simply wrapping up a 1-D data structure into a 3-D structure with a struct
. I'm not sure which approach will work best.
The biggest problems I've found with this code are:
- The width and height have to be typed in as
#defines
, so everyRGBImage
has to have the same size. - Code segfaults if an
RGBImage
is not defined asstatic
. - I'm not sure what the proper conventions are regarding the image editing itself, editing another image, or simply being a method which operates on images. Currently it's a mix of the first two.
Is there a "correct" C++ way to efficiently store the image, with the ability to change its dimensions upon instantiation?
Notes: Using g++ on Ubuntu. Not entirely sure about C++ version, I think over 11?
-
1\$\begingroup\$ Welcome to Code Review! I rolled back your last edit. After getting an answer you are not allowed to change your code anymore. This is to ensure that answers do not get invalidated and have to hit a moving target. If you have changed your code you can either post it as an answer (if it would constitute a code review) or ask a new question with your changed code (linking back to this one as reference). See the section What should I not do? on What should I do when someone answers my question? for more information \$\endgroup\$Sᴀᴍ Onᴇᴌᴀ– Sᴀᴍ Onᴇᴌᴀ ♦2023年02月06日 21:29:35 +00:00Commented Feb 6, 2023 at 21:29
4 Answers 4
General Observations
The first impression I get is that this is a C program rather than a C++ program. The reason for this is the use of #define CONSTANT
rather than using the C++ constexpr
or const
declarations. Another thing that makes this more C like than C++ is the use of C style arrays unsigned int img[RGB_H][RGB_W][RGB_D];
. A third thing that makes this more C like is that C style casts are being used float(*thetas)[RGB_W] = new float[RGB_H][RGB_W]; // matrix
(in C++ this should use a static_cast<>()).
Use of macros for constants in C++ is discouraged because they are not type safe.
static unsigned int constexpr RGB_H = 203; // CHANGE THIS DEPENDING ON THE IMAGE NOTE 1512 before 810
static double constexpr PI = 3.141592653;
Rather than using old style C arrays for the matrix it would be better to use C++ container classes such as std::array or std::vector
(link not provided since the code already uses std::vector
).
The use of memory allocation is also an indication that this is more C like than C++. In modern C++ pointers are avoided as much as possible and when they are used they are smart pointers.
When you compile using g++ you should use some additional flags on the command line to flag possible problems in the code, this flags will provide you with warnings or errors that may prevent bugs:
-Wall -Wextra -pedantic -Werror
Currently I am seeing more than 20 warning messages, primarily 'argument': conversion from 'size_t' to 'int', possible loss of data
. These warnings indicate bugs that are waiting to happen. The first of these warnings is on this line:
G = out.getGrayscale(i, j);
in the function canny()
.
The variable G
is declared as an int
however, it appears that out.getGrayscale()
is returning a size_t
.
Assigning a size_t
value to an int may give you a negative value for the int. If G
can never be negative then it is best to use either an unsigned int
or a size_t
. FYI, an unsigned int
may not be the correct type because size_t
might be defined as unsigned long
depending on your operating system and compiler.
In the future please don't post code that contains commented out code, that means that the code is not ready for review.
For performance reasons it is better to end a line of output with "\n"
rather than std::endl
.
Avoid using namespace std;
If you are coding professionally you probably should get out of the habit of using the using namespace std;
statement. The code will more clearly define where cout
and other identifiers are coming from (std::cin
, std::cout
). As you start using namespaces in your code it is better to identify where each function comes from because there may be function name collisions from different namespaces. The identifiercout
you may override within your own classes, and you may override the operator <<
in your own classes as well. This stack overflow question discusses this in more detail.
File Organization
Unlike C#
and Java
, C++
uses header files for class declarations. Generally C++
header (.h
) files contain only contain declarations and function prototypes, the executable code for the functions are generally in .cpp
source files. One of the reasons for this is to reduce compile times, the function signatures won't change much but the executable code might change a lot to add features or fix bugs. All, or almost all of the bodies of the functions in RGBImage should be moved to a .cpp
file.
This is what the header file may look like (this is using the current code and not other recommendations):
#pragma once
#include <string>
#include <vector>
#define RGB_H 203 // CHANGE THIS DEPENDING ON THE IMAGE NOTE 1512 before 810
#define RGB_W 360 // CHANGE THIS DEPENDING ON THE IMAGE NOTE 2016 before 1440
#define RGB_D 3
#define MAX_COLOR 255
#define R_COEFF 0.2989
#define G_COEFF 0.5870
#define B_COEFF 0.1140
#define PI 3.141592653
class RGBImage {
private:
public:
int height, width;
unsigned int img[RGB_H][RGB_W][RGB_D];
RGBImage();
RGBImage(std::string filepath);
~RGBImage() {}
void write(std::string filepath);
void reduceColors(short colFactor); // reduces the number of colors in each channel by quantizing
void dim(float dimFactor); // makes the image dimmer
void toGrayscale(); // converts self to a grayscale image
void binaryThresh(int threshValue);
// returns index 0 and also zero pads the result
int getGrayscale(int i, int j) {
if (i < 0 || j < 0 || i > RGB_H || i > RGB_W) {
return 0; // zero padding
}
return img[i][j][0];
}
void setGrayscale(int i, int j, unsigned int val) {
img[i][j][0] = val;
img[i][j][1] = val;
img[i][j][2] = val;
}
RGBImage conv2d(std::vector<std::vector<int>> kernel);
RGBImage canny(std::vector<std::vector<int>> sobelx, std::vector<std::vector<int>> sobely, RGBImage& other, int low, int high);
void thinEdges(RGBImage& img);
void combineImgs(const RGBImage& img1, const RGBImage& img2);
};
This allows someone who is using the RGBImage class or someone who is maintaining the class to easily find the public interfaces.
FYI, since there are no private variables or methods the private:
declaration is unnecessary.
It is not necessary to declare a default destructor. C++ will generate one. One only needs to declare destructors when the are following the rule of 3 or the rule of 5.
Complexity
Some measures of complexity are the number of lines in a function or a section of code and the number of levels of indentation. One of the best practices in coding is that a function should be completely viewable in a single scree (on my computer that is 58 lines of code or less).
The canny()
function is 94 lines of code, this is one and a half full screens on most computers. The reason that one screen max per function is a best practice is that it is very difficult for someone reading, writing or maintaining the code to keep track of everything the function is doing. Even with removing the commented out code this function is still too large.
The canny()
function is too complex (does too much).
There is also a programming principle called the Single Responsibility Principle that applies here. The Single Responsibility Principle states:
that every module, class, or function should have responsibility over a single part of the functionality provided by the software, and that responsibility should be entirely encapsulated by that module, class or function.
It appears that canny()
can be broken up into 3 or 4 functions, each outer for loop could be a function.
Magic Numbers
While this is not a major problem in this code, since many constants are defined, there are Magic Numbers in the code (127, and 255). There is a constant defined for 255, but there are 2 or more places where it isn't used there is no constant defined for 127 (is it GRAY?).
Numeric constants in code are sometimes referred to as Magic Numbers, because there is no obvious meaning for them. There is a discussion of this on stackoverflow.
Avoid creating macros
In C++ you almost never have to resort to macros. You can define constants using constexpr
, these can then be used to declare array sizes. For example:
constexpr std::size_t RGB_H = 303;
Macros have lots of problems; two of those are that they don't respect namespaces, and that constants like you defined them don't have a proper type.
Create a separate type for RGB values
Using a three-dimensional array for the image is making things unnecessarily complex. An image has only two dimensions. Consider creating a struct
or class
to represent the RGB values of a pixel:
struct RGB {
std::uint8_t r;
std::uint8_t g;
std::uint8_t b;
};
That way, you can declare the image like so:
RGB img[RGB_H][RGB_W];
There are libraries that provide vector types that might be helpful, for example GLM's glm::vec3
, which also comes with a lot of mathematical functions that can directory work on these types.
Don't hardcode the image size
The biggest problems I've found with this code are:
- The width and height have to be typed in as
#defines
, so everyRGBImage
has to have the same size.
That is obviously very undesirable, so just don't do that. Of course the issue then is that you cannot declare a member variable as an array of which you don't know the size at compile time. The solution in C++ is to use a std::vector
. You are already using those in your code, so you could have created a 2-dimensional vector the way you did for convolution kernels:
std::vector<std::vector<RGB>> image;
However, nested vectors are very inefficient. It's better to declare a one-dimensional array that is large enough to hold all pixels:
std::vector<RGB> image;
Store the width and height in member variables (you already declared width
and height
but you never used those). The size of the vector should of course be width * height
. Then given x
and y
coordinates, you can access a pixel using:
image[x + y * width]
Is there a "correct" C++ way to efficiently store the image, with the ability to change its dimensions upon instantiation?
With a std::vector
, the dimensions can be changed even after instantiation. Using a one-dimensional std::vector
keeps everything tightly packed together in memory, which is usually the most efficient.
Be consistent
I'm not sure what the proper conventions are regarding the image editing itself, editing another image, or simply being a method which operates on images. Currently it's a mix of the first two.
To avoid confusing the user of your class, it would be great if your interface was very consistent. So either have everything modify the image in-place, or have everything return a new image with the original one intact, or possibly provide both options for every operation.
You already have two answers discussing #define
, using namespace std
, compile-time image size, and a bunch of other things. I will avoid repeating those points and discuss some other things:
Documentation: Make sure each function is documented. You'll thank me next year when you don't remember what you meant by
thinEdges()
. :)Formatting: Some functions don't have space around them, they are declared on the very next line after the previous function. Empty lines around functions make the file more readable. I found it hard to find where you defined
conv2d()
, for example.Images are most often stored as 8-bit or 16-bit unsigned integers, or as floating-point values. The former is how they come off the sensor, and how they're shown to screen. The floating-point representation is useful for increased precision in computations.
unsigned int
is a 32-bit unsigned integer, and not terribly useful. If you used (signed)int
, you could store the result of a Sobel or Laplace filter (which produce negative values). But I would have chosenfloat
because it's so much easier (e.g. no overflow, division by 0 errors, ), and it takes up the same amount of space! You could also parameterize the type by turning the class into a template, but that's a bit more advanced.height
andwidth
are public variables. Someone using the class could change those values, making the object inconsistent and causing out-of-bounds memory access later on. Make these variables private, and add a function to read their values.What should be class methods and what should be free functions? I am not a big fan of image processing functions being methods to the image class. I think things are clearer when these are free functions that take an image as input. This would remove one reason for the inconsistency in your function calls. For example,
out = img.canny()
is not as pretty to me asout = canny(img)
. But when multiple images are involved, the problem becomes more obvious. YourcombineImgs(img1, img2)
uses*this
as the output, whereas other functions use*this
as the input. As a free function, it's much more natural to define a function that takes two images as input:out = combineImgs(img1, img2)
.Class methods should be functions that work directly on the object, for example accessors, operators, and functions that modify it (e.g. change the image size).
combineImgs()
adds up two images. Why not make this into an operator? You can declareRGBImage operator+(RGBImage const&, RGBImage const&)
, then you can useout = img1 + img2
. This is (IMO) more expressive thancombineImgs()
, since there are so many different ways to combine images.Actually, on second reading, it looks like
canny()
does not use*this
at all, it takes an input image asother
? Is*this
used? If not, why is this a class method?canny()
takessobelx
andsobely
kernels as input. This would make the function much harder to use: these kernels are fixed and never change, so you are asking the user to always put in the same values here. On the other hand, the original Canny doesn't use Sobel filters, it uses Gaussian derivatives, so you'd expect a sigma parameter for the Gaussian filter to be a parameter tocanny()
.The Canny filter has several stages: derivatives (you've got the
conv2d()
call here), non-maximum suppression, hysteresis threshold, and thinning (I don't see you using the thinning, this is something you could add). All three of these steps are useful outside the Canny filter. If you make these into separate functions, then thecanny()
function will be shorter and easier to read, and you'll have added functionality to your library! For example, this is how I implemented Canny in DIPlib, notice how the largest part of that function is the logic to compute thresholds from the inputs provided by the user.Every time you do a triple loop (over i/x, j/y and d/channel) and apply the same operation to the value inside, you could simply use a single loop.
unsigned int img[RGB_H][RGB_W][RGB_D]
declares a contiguous memory block, you can simply loop over each integer using a single loop.R_COEFF
and the other two values are only used in one place. There is one function that converts RGB to grayscale. Declare these constants in that function, so that (1) it's easy to find their definition when you're reading the function, and (2) it's not as hard to find a name (being a global variable it must be unique within the file, inside the function there's a much smaller scope so name clashes are less likely). This becomes increasingly important as your project grows in size.There's a bug in your code:
if(i < 0 || j < 0 || i > RGB_H || i > RGB_W)
doesn't do the check properly. Ifi == RGB_W
, then you'll be writing out of bounds. It should beif(i < 0 || j < 0 || i >= RGB_H || i >= RGB_W)
. But instead of using the hard-coded constants, you should be using theheight
andwidth
variables here. At some point you'll improve your code to have dynamic image sizes, and it'll be a lot more work then to change the code to use your size variables instead of the constants.Another bug:
getGrayscale()
returns anint
instead of anunsigned int
.
Others have suggested the use of std::vector<>
to store the image data. If you do that, it must be a private member, in the same way that your image sizes must be private members. You'll have a class invariance that is width * height * 3 == img.size()
, and you need to be able to guarantee that this is always true. Making these variables private is the only way you can do that.
With this memory being private, you'll have to add pixel access functions. You already have getGrayscale()
and setGrayscale()
. You could consider, for example, defining unsigned int& operator()(int i, int j, int c)
. This returns a reference to a value in the image, so you can assign to it, image(4, 8, 0) = 5
would be valid. Of course you could also give it a regular function name, unsigned int& getValue(int i, int j, int c)
for example; you'll still be able to assign to it: image.value(4, 8, 0) = 5
.
Be very careful not to define a std::vector<std::vector<uint>>
though. It is hard to initialize, and very inefficient. Just have a std::vector<uint>
, and write your pixel getting/setting functions to compute the index. Outside of your class code you will never know or think about how it's implemented, you will always just use normal indexing. If you make the number of channels a parameter as well, you'll be able to use this class also for gray-scale images, which is what a lot of your code works with anyway.
Though for some advanced algorithms, it is actually really useful to do pointer arithmetic to access a pixel's neighbors. For those algorithms you will want to implement a function that returns a pointer to the first pixel (or to a pixel at a given coordinate), and you will want to have a function that says how many values to skip to get to the next pixel on the left/right (3
in your case) and bottom/top (3 * width
in your case).
It has already been said a lot, but I would like to add a word.
We should put the implementation in a namespace to localize the names we introduce.
At the beginning, it may be easier to represent colors by means of a custom data type, like struct Color
.
It allows us to separate the color and image implementations and vary both independently.
We could store Color
s in a flat buffer instead of a matrix like it is done commonly.
std::vector
is a well established data type.
Using vector
allows us to omit the image destructor and copy/move-constructor definitions because automatically generated versions of the definitions operate per class member, and std::vector
data type has them all implemented.
We may use const
and noexcept
modifiers of class methods more extensively.
A constructor with a single parameter is treated as conversion constructor, e.g., std::string s = "hello";
.
Obviously, we don't intend to convert a string to an image.
To make the intent explicit, supply the constructor with explicit
keyword,
RGBImage img1 = "path"; // error, if the constructor is explicit
RGBImage img2 = RGBImage("path"); // ok
There is no need to call close
on the file stream object.
The destructor does it for us.
But we should verify the file stream state more often to not miss an error.
getGrayscale
member function returns a value of the red channel and can't guarantee that the image is actually grayscale.
It is meant to be used in computationally intensive algorithms and may slower the program execution due to the ubiquitous bounds checking.
Leave the bounds checking responsibility to the calling code.
Moreover, we represent our image as surrounded with the infinite black area. It may cause a lot of subtle computation errors, which in the end can only be determined visually.
The following code is a box filter implementation example.
It depends on the header-only cimg library for quick visualization of the results.
The header file resides in the github repository.
I couldn't make it work with jpg images, probably due to the missing ImageMagick, but it works fine with ppm
format.
For now, we must hardcode a path to the image.
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <CImg.h>
#include <algorithm>
namespace myimglib
{
static constexpr double pi = 3.14159265358979323846;
static constexpr float eps = 1e-6;
struct Color {
Color& operator=(float value) noexcept {
r = g = b = value;
return *this;
}
Color& operator*=(float k) noexcept {
r *= k;
g *= k;
b *= k;
return *this;
}
Color& operator/=(float k) noexcept {
r /= k;
g /= k;
b /= k;
return *this;
}
Color& operator+=(const Color& rhs) noexcept {
r += rhs.r;
g += rhs.g;
b += rhs.b;
return *this;
}
Color& operator-=(const Color& rhs) noexcept {
r -= rhs.r;
g -= rhs.g;
b -= rhs.b;
return *this;
}
friend Color operator/(Color lhs, float k) noexcept {
return lhs /= k;
}
friend Color operator*(float k, Color rhs) noexcept {
return rhs *= k;
}
friend Color operator+(Color lhs, const Color& rhs) noexcept {
return lhs += rhs;
}
friend Color operator-(Color lhs, const Color& rhs) noexcept {
return lhs -= rhs;
}
void clamp() {
r = (r < eps) ? 0 : (r > 1 + eps) ? 1 : r;
g = (g < eps) ? 0 : (g > 1 + eps) ? 1 : g;
b = (b < eps) ? 0 : (b > 1 + eps) ? 1 : b;
}
float r;
float g;
float b;
};
class RGBImage {
public:
RGBImage(std::ptrdiff_t height = 0, std::ptrdiff_t width = 0)
: height_(height), width_(width),
data_(height_ * width_, { 0, 0, 0 })
{ }
explicit RGBImage(const std::string& filePath);
auto height() const noexcept {
return height_;
}
auto width() const noexcept {
return width_;
}
const Color& operator()(std::ptrdiff_t x, std::ptrdiff_t y) const noexcept {
return data_[y * width_ + x];
}
Color& operator()(std::ptrdiff_t x, std::ptrdiff_t y) noexcept {
return data_[y * width_ + x];
}
friend RGBImage readImage(const std::string& filePath);
friend bool writeImage(const RGBImage& image, const std::string& filePath);
void dim(float factor) noexcept {
std::for_each(std::begin(data_), std::end(data_), [factor](Color& c) {
c *= factor;
});
}
void grayscale(float rk, float gk, float bk) noexcept {
std::for_each(std::begin(data_), std::end(data_), [rk, gk, bk](Color& c) {
c = rk * c.r + gk * c.g + bk * c.b;
});
}
void binaryThresh(float threshold) noexcept {
std::for_each(std::begin(data_), std::end(data_), [threshold](Color& c) {
c.r = (c.r > threshold + eps) ? 1 : 0;
c.g = (c.g > threshold + eps) ? 1 : 0;
c.b = (c.b > threshold + eps) ? 1 : 0;
});
}
RGBImage conv2d(const std::vector<std::vector<int>>& kernel) const;
private:
std::ptrdiff_t height_;
std::ptrdiff_t width_;
std::vector<Color> data_;
};
RGBImage readImage(const std::string& filePath) {
std::ifstream file(filePath, std::ios::binary);
if (!file) {
std::cerr << "Failed to open file " << filePath << '\n';
return {};
}
RGBImage image;
if (!file.read(reinterpret_cast<char*>(&image.height_), sizeof(image.height_))
.read(reinterpret_cast<char*>(&image.width_), sizeof(image.width_))) {
std::cerr << "Failed to read image dimensions\n";
return {};
}
auto n = image.height_ * image.width_;
image.data_.resize(n);
if (!file.read(reinterpret_cast<char*>(image.data_.data()), n * sizeof(Color))) {
std::cerr << "Failed to read image content\n";
return {};
}
return image;
}
RGBImage::RGBImage(const std::string& filePath)
: RGBImage(readImage(filePath))
{ }
bool writeImage(const RGBImage& image, const std::string& filePath) {
std::ofstream file(filePath, std::ios::binary);
if (!file) {
std::cerr << "Failed to open file " << filePath << '\n';
return false;
}
if (!file.write(reinterpret_cast<const char*>(&image.height_), sizeof(image.height_))
.write(reinterpret_cast<const char*>(&image.width_), sizeof(image.width_))) {
std::cerr << "Failed to write image dimensions\n";
return false;
}
if (!file.write(reinterpret_cast<const char*>(image.data_.data()),
image.data_.size() * sizeof(Color))) {
std::cerr << "Failed to write image content\n";
return false;
}
return true;
}
RGBImage RGBImage::conv2d(const std::vector<std::vector<int>>& kernel) const {
using std::ptrdiff_t;
int ksum = 0;
for (const auto& row : kernel)
for (const int value : row)
ksum += value;
if (ksum <= 0)
ksum = 1;
RGBImage out(height_, width_);
const ptrdiff_t kheight = kernel.size();
const ptrdiff_t hmid = kheight / 2;
const ptrdiff_t kwidth = kernel[0].size();
const ptrdiff_t wmid = kwidth / 2;
for (ptrdiff_t oy = 0; oy < height_; ++oy) {
for (ptrdiff_t ox = 0; ox < width_; ++ox) {
Color outColor{ 0, 0, 0 };
for (ptrdiff_t i = 0, y = oy - hmid; i < kheight && y <= oy + hmid && y < height_; ++i, ++y) {
if (y < 0)
continue;
for (ptrdiff_t j = 0, x = ox - wmid; j < kwidth && x <= ox + wmid && x < width_; ++j, ++x) {
if (x >= 0)
outColor += kernel[i][j] * (*this)(x, y);
}
}
out(ox, oy) = outColor / ksum;
}
}
return out;
}
} // myimglib
void boxblur(cimg_library::CImg<unsigned char>& img, int kernelSize) {
myimglib::RGBImage myimage(img.height(), img.width());
for (int y = 0; y < img.height(); ++y) {
for (int x = 0; x < img.width(); ++x) {
myimage(x, y) = {
float(img(x, y, 0)) / 255,
float(img(x, y, 1)) / 255,
float(img(x, y, 2)) / 255
};
}
}
std::vector<std::vector<int>> kernel(kernelSize, std::vector<int>(kernelSize, 1));
auto modified = myimage.conv2d(kernel);
for (int y = 0; y < img.height(); ++y) {
for (int x = 0; x < img.width(); ++x) {
img(x, y, 0) = modified(x, y).r * 255;
img(x, y, 1) = modified(x, y).g * 255;
img(x, y, 2) = modified(x, y).b * 255;
}
}
}
int main()
{
using namespace cimg_library;
const char* imagePath = "YOUR PATH TO IMAGE";
CImg<unsigned char> image(imagePath), visu(500, 400, 1, 3, 0);
boxblur(image, 9);
const unsigned char red[] = { 255,0,0 }, green[] = { 0,255,0 }, blue[] = { 0,0,255 };
CImgDisplay main_disp(image, "Click a point"), draw_disp(visu, "Intensity profile");
while (!main_disp.is_closed() && !draw_disp.is_closed()) {
main_disp.wait();
if (main_disp.button() && main_disp.mouse_y() >= 0) {
const int y = main_disp.mouse_y();
visu.fill(0).draw_graph(image.get_crop(0, y, 0, 0, image.width() - 1, y, 0, 0), red, 1, 1, 0, 255, 0);
visu.draw_graph(image.get_crop(0, y, 0, 1, image.width() - 1, y, 0, 1), green, 1, 1, 0, 255, 0);
visu.draw_graph(image.get_crop(0, y, 0, 2, image.width() - 1, y, 0, 2), blue, 1, 1, 0, 255, 0).display(draw_disp);
}
}
return 0;
}
-
1\$\begingroup\$ Some good points. But
RGBImage img2 = RGBImage("path");
is better written asRGBImage img2("path");
because it’s less redundant. Also, I would suggest you usestd::size_t
for image sizes, notstd::ptrdiff_t
, because sizes cannot be negative. \$\endgroup\$Cris Luengo– Cris Luengo2023年02月12日 16:05:51 +00:00Commented Feb 12, 2023 at 16:05 -
\$\begingroup\$ @CrisLuengo I used
size_t
at first, but ended up fixing subtle mixed signed/unsigned arithmetic bugs in the conv2d function. The freedom of not bothering about mixed arithmetic complications seems more important to me when implementing complex algorithms. Could you share your experience on this? \$\endgroup\$panik– panik2023年02月13日 12:22:08 +00:00Commented Feb 13, 2023 at 12:22 -
1\$\begingroup\$ I turn on warnings for implicit casts, and use explicit casts everywhere. It ensures I know what types are being combined and allows me to verify the casts make sense. The
size
of containers in the standard lib is an unsigned integer. Doingfor(std::ptrdiff_t ii=0; ii<img.size(); ++ii)
usually leads to a warning about comparing values of different signedness. So using unsigned integers sometimes is easier. But there always are places where signed and unsigned integers collide, and it’s annoying. \$\endgroup\$Cris Luengo– Cris Luengo2023年02月13日 14:46:45 +00:00Commented Feb 13, 2023 at 14:46