基于SSE实现图像二值化
SSE介绍及使用可见:https://blog.csdn.net/thisiszdy/article/details/132512686
本文使用SSE指令集来实现图像二值化算法,同时对比OpenCV二值化算子及for循环求解二值化的效果及性能。
// opencvTest.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include <emmintrin.h>
#include <opencv2/imgcodecs.hpp>
using namespace cv;
using namespace std;
int main()
{
clock_t start, end;
const char* imageName = "test.bmp";
cv::Mat src = imread(imageName, IMREAD_GRAYSCALE);
//OpenCV二值化
start = clock();
cv::Mat dst;
cv::threshold(src, dst, 50, 255, cv::THRESH_BINARY);
end = clock();
double time_cost = (double)(end - start);
std::cout << "OpenCV Threshold耗时:" << time_cost << std::endl;
//for循环二值化
start = clock();
cv::Mat dstfor(src.rows, src.cols, CV_8UC1);
uchar* ptrSrc = src.data;
uchar* ptrdst = dstfor.data;
int lenght = src.rows * src.cols * src.channels();
#pragma omp parallel
for (int i = 0;i < lenght;i++)
{
if (*(ptrSrc + i) <= 50)
{
*(ptrdst + i) = 0;
}
else
{
*(ptrdst + i) = 255;
}
}
end = clock();
double time_cost1 = (double)(end - start);
std::cout << "for Threshold耗时:" << time_cost1 << std::endl;
//SSE加速二值化
start = clock();
uchar thre = 50;
uchar maxV = 255;
cv::Mat dts3(src.rows, src.cols, CV_8UC1);
uchar* ptrSrc1 = src.ptr();
uchar* ptrdst1 = dts3.data;
size_t srcstep = src.step;
__m128i smask = _mm_set1_epi8((char)128);
__m128i mThre = _mm_set1_epi8(50);
#pragma omp parallel
for (size_t i = 0; i < src.rows; i++, ptrSrc1 += srcstep, ptrdst1 += srcstep)
{
for (size_t j = 0; j <= src.cols * src.channels() - 16; j += 16)
{
__m128i mValue = _mm_loadu_si128((__m128i*)(ptrSrc1 + j)); //加载内存到寄存器
auto sub1 = _mm_sub_epi8(mValue, smask); //无符号0~255范围转为有符号-128~127范围
auto sub2 = _mm_sub_epi8(mThre, smask);
mValue = _mm_cmpgt_epi8(sub1, sub2); //与阈值比较,大于阈值返回255,小于阈值返回0
_mm_storeu_si128((__m128i*)(ptrdst1 + j), mValue); //寄存器加载到内存
}
}
end = clock();
double time_cost2 = (double)(end - start);
std::cout << "SSE Threshold耗时:" << time_cost2 << std::endl;
// 效果比较
Mat findzero1 = dst != dstfor; //OpenCV和for循环比较效果
Mat findzero2 = dst != dts3; //Opencv和SEE指针比较效果
vector<cv::Point> veczero1;
vector<cv::Point> veczero2;
cv::findNonZero(findzero1, veczero1);
cv::findNonZero(findzero2, veczero2);
int num1 = veczero1.size();
int num2 = veczero2.size();
std::cout << "OpenCV和for循环两种方法逐像素比较,像素不同个数:" << num1 << std::endl;
std::cout << "Opencv和SEE指针两种方法逐像素比较,像素不同个数:" << num2 << std::endl;
system("pause");
return 0;
}
运算结果显示,SSE指令集加速阈值分割更快,检测效果与OpenCV一致。