I implemented the Sobel algorithm sequentially, and got decent performance (1.49 s) but wanted to improve it. I parallelized it with OpenMP and got a ~3x speedup (0.523 s). I'd like to do better, but I'm not sure how.
// ------- C/C++ includes ------
#include <iostream>
#include <stdio.h>
#include <omp.h>
#include <time.h>
// ------ OpenCV includes ------
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
// dimension of kernel
int x[3][3];
int y[3][3];
/*----- OpenMP -----*/
int num_of_threads, i ,j;
double start, end;
int main( int argc, char** argv )
{
Mat initialImage = imread(argv[1], 0); // imread gray-scale image
Mat finalImage = Mat::zeros(initialImage.size(), initialImage.type());
if (finalImage.type() == initialImage.type() )
{
cout << "YES" << endl;
}
if(argc != 2 || !initialImage.data)
{
cout << "No image data or Usage: ./sobel imagePath" << endl;
return -1;
}
else
cout << "Image OK!" << endl;
//x direction
x[0][0] = -1; x[0][1] = 0; x[0][2] = 1;
x[1][0] = -2; x[1][1] = 0; x[1][2] = 2;
x[2][0] = -1; x[2][1] = 0; x[2][2] = 1;
//y direction
y[0][0] = -1; y[0][1] = -2; y[0][2] = -1;
y[1][0] = 0; y[1][1] = 0; y[1][2] = 0;
y[2][0] = 1; y[2][1] = 2; y[2][2] = 1;
num_of_threads = 8;//omp_get_num_procs();
omp_set_num_threads(num_of_threads);
start = omp_get_wtime();
for(j = 0; j < initialImage.rows - 2; j++ ){
#pragma omp parallel for private(i)
for(i = 0; i < initialImage.cols -2; i++ ){
// applay karnel in x direction
int xValOfPixel =
(x[0][0] * (int)initialImage.at<uchar>(j, i )) + (x[0][1] * (int)initialImage.at<uchar>(j + 1, i )) + (x[0][2] * (int)initialImage.at<uchar>(j + 2, i )) +
(x[1][0] * (int)initialImage.at<uchar>(j, i + 1)) + (x[1][1] * (int)initialImage.at<uchar>(j + 1, i + 1)) + (x[1][2] * (int)initialImage.at<uchar>(j + 2, i + 1)) +
(x[2][0] * (int)initialImage.at<uchar>(j, i + 2)) + (x[2][1] * (int)initialImage.at<uchar>(j + 1, i + 2)) + (x[2][2] * (int)initialImage.at<uchar>(j + 2, i + 2));
// apply karnel in y direction
int yValOfPixel =
(y[0][0] * (int)finalImage.at<uchar>(j, i )) + (y[0][1] * (int)finalImage.at<uchar>(j + 1, i )) + (y[0][2] * (int)finalImage.at<uchar>(j + 2, i )) +
(y[1][0] * (int)finalImage.at<uchar>(j, i + 1)) + (y[1][1] * (int)finalImage.at<uchar>(j + 1, i + 1)) + (y[1][2] * (int)finalImage.at<uchar>(j + 2, i + 1)) +
(y[2][0] * (int)finalImage.at<uchar>(j, i + 2)) + (y[2][1] * (int)finalImage.at<uchar>(j + 1, i + 2)) + (y[2][2] * (int)finalImage.at<uchar>(j + 2, i + 2));
int sum = abs(xValOfPixel) + abs(yValOfPixel);
if(sum > 255)
sum = 255;
finalImage.at<uchar>(j, i) = (uchar)sum;
}
}
end = omp_get_wtime();
cout << "Time: " << end - start << endl;
// display the images
namedWindow("Initial Image", WINDOW_AUTOSIZE);
namedWindow("Final Image", WINDOW_AUTOSIZE);
imshow("Initial Image",initialImage);
imshow("Final Image",finalImage);
waitKey(0);
return 0;
}
1 Answer 1
Avoid using namespace
Especially for large namespaces such as std
, bringing so many identifiers into scope is dangerous. Prefer to import just the identifiers you need, and/or qualify at use; as it is, I get a compilation failure:
195834.cpp: In function ‘int main(int, char**)’:
195834.cpp:81:5: error: reference to ‘end’ is ambiguous
end = omp_get_wtime();
^~~
Don't include <stdio.h>
and <time.h>
.
When writing new code, prefer <cstdio>
and <ctime>
, which brings the names into the std
namespace. Even better, omit these includes entirely, as we don't need them.
And do include <cmath>
, so we have a declaration for std::abs()
.
Don't put everything in main()
There's three parts to this program. It's easier to follow if we split the input and output from the processing.
Avoid global variables
Part of the problem with end
above is that it's declared global when it has no right to be. Keep the scope of your variables as small as reasonably possible (and give them more meaningful names!). Similarly, the kernels can be declared within the function:
static const int x[3][3] = { { -1, 0, 1 }, { -2, 0, 2 }, { -1, 0, 1 }};
static const int y[3][3] = { { -1, -2, -1 }, { 0, 0, 0 }, { 1, 2, 1 }};
Use the correct output streams
Use std::cerr
for reporting errors, and std::clog
for other diagnostics.
Check argv[1]
is valid before using it.
We have
Mat initialImage = imread(argv[1], 0); // DANGER
// ...
if(argc != 2 || !initialImage.data)
We hit undefined behaviour if we access argv[1]
before discovering that argc < 2
.
Use the correct image for the y-gradient
We should be using initialImage
, not finalImage
to compute yValOfPixel
, otherwise we'll always get zero.
Use a bigger work unit
You'll get better parallelisation if you make the outer loop a parallel-for, rather than the inner loop. Since each row is the same length, you're better off dividing the work into bigger chunks
Hard-code the Sobel coefficients
Since this isn't a general kernel convolution, we can help the compiler by using the constants in-place. This allows the multiplication by small constants to be replaced by addition or subtraction (or omitted entirely for the six 0s).
Use standard functions to limit values
Instead of the ad-hoc test for sum
exceeding 255 (and no test for below zero), we can use std::clamp()
(from <algorithm>
).
Modified code
#include <algorithm>
#include <cmath>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
cv::Mat sobel_transform(const cv::Mat& input)
{
cv::Mat finalImage = cv::Mat::zeros(input.size(), input.type());
#pragma omp parallel for
for (int j = 0; j < input.rows-2; ++j) {
for (int i = 0; i < input.cols-2; ++i) {
// applay karnel in x direction
int xValOfPixel =
- (int)input.at<uchar>(j, i ) + (int)input.at<uchar>(j + 2, i )
- 2 * (int)input.at<uchar>(j, i + 1) + 2 * (int)input.at<uchar>(j + 2, i + 1)
- (int)input.at<uchar>(j, i + 2) + (int)input.at<uchar>(j + 2, i + 2);
// apply karnel in y direction
int yValOfPixel =
- (int)input.at<uchar>(j, i ) - 2 * (int)input.at<uchar>(j + 1, i ) - (int)input.at<uchar>(j + 2, i )
+ (int)input.at<uchar>(j, i + 2) + 2 * (int)input.at<uchar>(j + 1, i + 2) + (int)input.at<uchar>(j + 2, i + 2);
int sum = std::clamp(std::abs(xValOfPixel) + std::abs(yValOfPixel), 0, 255);
finalImage.at<uchar>(j, i) = (uchar)sum;
}
}
return finalImage;
}
#include <omp.h>
#include <iostream>
int main(int argc, char** argv)
{
if (argc != 2) {
std::cerr << "Usage: ./sobel imagePath" << std::endl;
return 1;
}
const cv::Mat initialImage = cv::imread(argv[1], cv::IMREAD_GRAYSCALE);
if (!initialImage.data) {
std::cerr << "Failed to read image" << std::endl;
return 1;
}
double start_time = omp_get_wtime();
const cv::Mat finalImage = sobel_transform(initialImage);
double end_time = omp_get_wtime();
std::clog << "Time: " << end_time - start_time << std::endl;
// display the images
cv::namedWindow("Initial Image", cv::WINDOW_AUTOSIZE);
cv::namedWindow("Final Image", cv::WINDOW_AUTOSIZE);
cv::imshow("Initial Image", initialImage);
cv::imshow("Final Image", finalImage);
cv::waitKey(0);
}
-
\$\begingroup\$ I'm assuming you didn't want an answer that simply said "use
cv::Sobel
instead"... \$\endgroup\$Toby Speight– Toby Speight2018年06月05日 13:32:18 +00:00Commented Jun 5, 2018 at 13:32
omp_set_num_threads()
? \$\endgroup\$