最近碰到一个检验设备是生成PDF文件报告的。imedicallis监听程序把PDF解析出来之后发现PDF里面图不是多个小图,而是一张大图。但用户又要传到检验系统的是小图,而且小图位置和数量不固定,也不能用固定位置截取实现。为此开启一段“高端设备局”之旅。以前咱取过图、还原过图、按数据和约定画过图,从来没按边框切过图。
仪器源PDF如下,里面是一张大图,监听已经可以得到一页页的大图了。
现在需要把他的小图边界分析出来并切割。我的思路是:
1.先把图片二极化,出来成纯黑白图。然后得到01和二维数组值。
2.遍历二维数组找到所有长度大于指定值的线段。
3.遍历线段得到所有矩形。
4.去除矩形面向叠加超过百分之九十的矩形。
5.在矩形基础每次给每个边外扩指定像素。然后检查二维数组里外扩后矩形边上有没有像素在。从开始没像素或有像素到外扩到无像素即到了文字边界。这里就是每个小图的边界。
6.按外扩的边框切割得到目标图片。
1.原始图
2.二极化得到黑白图
3.从两级化图得到二维数组
4.变量数组找到所有目标线段并且画出来看对不对
5.从所有线段算出所有矩形,并且画出来看对不对
6.把矩形进行面积叠加计算去除重复的,然后画剩余的矩形看对不对
7.按矩形外扩法推断得到文字边界,并且把外扩矩形画出来看对不对
8.按外扩矩形切割得到小图
至此就实现了PDF解析的整个大图切割得到小图了。C#实现算法逻辑如下:
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
namespace ImageCutTest
{
public class RectCutImageUtil
{
/// <summary>
/// 按边框切图
/// </summary>
/// <param name="path"></param>
public static void CutImage(string path)
{
//加载图片
Image img = Image.FromFile(path);
//先把图片二极化,只有黑白
Bitmap step1bmp = ConvertToBipolar(img as Bitmap);
string step1Path = path + "step1.bmp";
step1bmp.Save(step1Path);
//得到二极化值后的数组
List<List<int>> numList = GetNumberMap(step1bmp);
string txtPath = path + "step2.txt";
//输出值到文本
OutPutValueToTxt(numList, txtPath);
//得到所有大于90的线条
List<LineCls> lineList = GetAllLine(numList, 80);
string lineImgPath = path + "step3.bmp";
//画出找到的线条
ShowLine(step1bmp, lineList, lineImgPath);
//通过线算出矩形
List<Rectangle> recList = GetRectangle(lineList, 60);
string rectImgPath = path + "step4.bmp";
//画出找到的矩形
ShowRectangle(step1bmp, recList, rectImgPath);
//删除重叠的矩形
DeleteOverlapRect(recList);
string rectImgPath1 = path + "step5.bmp";
//画出找到的矩形,已经矩形去重了
ShowRectangle(step1bmp, recList, rectImgPath1);
//把矩形外扩到文字范围
List<Rectangle> newRectList = OuterToChar(recList, numList, 8);
string rectImgPath2 = path + "step6.bmp";
//画出找到的矩形,已经矩形去重了
ShowRectangle(step1bmp, newRectList, rectImgPath2);
//按矩形切割小图
CutRectImage(img as Bitmap, newRectList, path);
}
/// <summary>
/// 在原图上截图出指定区域的小图
/// </summary>
/// <param name="source"></param>
/// <param name="recList"></param>
private static void CutRectImage(Bitmap source, List<Rectangle> recList, string oldFilePath)
{
FileInfo fi = new FileInfo(oldFilePath);
string dirPath = fi.Directory.FullName;
for(int i=0;i<recList.Count;i++)
{
Bitmap bmp = new Bitmap(recList[i].Width, recList[i].Height);
Graphics g = Graphics.FromImage(bmp);
g.Clear(Color.White);
g.DrawImage(source, -recList[i].Left, -recList[i].Top);
bmp.Save(oldFilePath+"-imedicalliscut-"+i+".bmp");
}
}
/// <summary>
/// 把矩形外扩到文字,返回包含文字的矩形
/// </summary>
/// <param name="recList"></param>
/// <param name="numList"></param>
/// <param name="maxOuter"></param>
/// <returns></returns>
private static List<Rectangle> OuterToChar(List<Rectangle> recList, List<List<int>> numList, int maxOuter)
{
List<Rectangle> ret = new List<Rectangle>();
for (int i = 0; i < recList.Count; i++)
{
bool start = false;
int topAdd = 0;
int rightAdd = 0;
int buttomAdd = 0;
int leftAdd = 0;
for (int j = 1; j < maxOuter; j++)
{
topAdd = 3 * j;
rightAdd = 1 * j;
buttomAdd = 5 * j;
leftAdd = 5 * j;
bool hasPixel = OuterToRectHasPixel(recList[i], numList, topAdd, rightAdd, buttomAdd, leftAdd);
if (hasPixel == true && start == false)
{
start = true;
}
if (start == true && hasPixel == false)
{
break;
}
}
Rectangle newRec = new Rectangle(recList[i].Left - leftAdd, recList[i].Top - topAdd, recList[i].Width + leftAdd + rightAdd, recList[i].Height + topAdd + buttomAdd);
ret.Add(newRec);
}
return ret;
}
/// <summary>
/// 外扩到文字,每次扩展矩形边框特定像素,看对应变上是否有像素值。从没像素或有像素落在四条边上到没像素落在四个边就找到文字边界了
/// 该方法检查外扩之后是否有像素落在四个边,有就返回true,没有就返回false
/// </summary>
/// <param name="rect"></param>
private static bool OuterToRectHasPixel(Rectangle rect, List<List<int>> numList, int topAdd, int rightAdd, int buttomAdd, int leftAdd)
{
bool ret = false;
int left = rect.Left - leftAdd;
int top = rect.Top - topAdd;
int width = rect.Width + leftAdd + rightAdd;
int height = rect.Height + topAdd + buttomAdd;
int maxX = left + width;
int maxY = top + height;
//外扩之后还没出图形范围就判断是否有像素落在边框
if (left > 0 && top > 0 && maxX < numList[0].Count && maxY < numList.Count)
{
//判断是否有像素落在横线上
for (int i = left; i <= maxX; i++)
{
if (numList[top][i] == 1)
{
ret = true;
break;
}
if (numList[maxY][i] == 1)
{
ret = true;
break;
}
}
//判断是否有像素落在横线上
for (int i = top; i <= maxY; i++)
{
if (numList[i][left] == 1)
{
ret = true;
break;
}
if (numList[i][maxX] == 1)
{
ret = true;
break;
}
}
}
return ret;
}
/// <summary>
/// 删除重叠的矩形
/// </summary>
/// <param name="recList"></param>
private static void DeleteOverlapRect(List<Rectangle> recList)
{
int len = recList.Count;
if (len > 1)
{
for (int i = 0; i < len - 1; i++)
{
for (int j = i + 1; j < len; j++)
{
float rate = OverlapRate(recList[i].X, recList[i].Y, recList[i].Height, recList[i].Width, recList[j].X, recList[j].Y, recList[j].Height, recList[j].Width);
if (rate > 0.92)
{
recList.RemoveAt(j);
j--;
len--;
}
}
}
}
}
/// <summary>
/// 计算两个矩形框的重叠比率。参数:x、y为矩阵左下角坐标值,h、w为矩形的高和宽
/// </summary>
/// <param name="x1"></param>
/// <param name="y1"></param>
/// <param name="h1"></param>
/// <param name="w1"></param>
/// <param name="x2"></param>
/// <param name="y2"></param>
/// <param name="h2"></param>
/// <param name="w2"></param>
/// <returns></returns>
private static float OverlapRate(int x1, int y1, int h1, int w1, int x2, int y2, int h2, int w2)
{
int endx = Math.Max(x1 + w1, x2 + w2);
int startx = Math.Min(x1, x2);
//重叠部分宽
int width = w1 + w2 - (endx - startx);
int endy = Math.Max(y1 + h1, y2 + h2);
int starty = Math.Min(y1, y2);
//重叠部分高
int height = h1 + h2 - (endy - starty);
if (width > 0 && height > 0)
{
//重叠部分面积
int area = width * height;
int area1 = w1 * h1;
int area2 = w2 * h2;
float ratio = (float)area / (area1 + area2 - area);
return ratio;
}
else
{
// 不重叠:算出来的width或height小于等于0
return 0;
}
}
/// <summary>
/// 在黑白图上画出找到的矩形
/// </summary>
/// <param name="source"></param>
/// <param name="lineList"></param>
/// <param name="savepath"></param>
private static void ShowRectangle(Bitmap source, List<Rectangle> recList, string savepath)
{
Bitmap bmp = new Bitmap(source.Width, source.Height);
Graphics g = Graphics.FromImage(bmp);
g.Clear(Color.White);
g.DrawImage(source, 0, 0);
Pen pen = new Pen(Color.Orange);
pen.Width = 1;
foreach (Rectangle t in recList)
{
g.DrawRectangle(pen, t);
}
bmp.Save(savepath);
}
/// <summary>
/// 得到线条相交的矩形
/// </summary>
/// <param name="lineList"></param>
/// <returns></returns>
private static List<Rectangle> GetRectangle(List<LineCls> lineList, int length)
{
List<Rectangle> ret = new List<Rectangle>();
//横线
List<LineCls> lineList0 = new List<LineCls>();
//竖线
List<LineCls> lineList1 = new List<LineCls>();
//挑出横线和竖线
for (int i = 0; i < lineList.Count; i++)
{
if (lineList[i].Drict == 0)
{
lineList0.Add(lineList[i]);
}
else
{
lineList1.Add(lineList[i]);
}
}
//计算矩形的噪音值
int flaotNum = 20;
//从每条横线开始找出矩形
for (int i = 0; i < lineList0.Count; i++)
{
//左竖线
LineCls left = null;
//右竖线
LineCls right = null;
//先找到两根相交且在他下方的竖线
for (int j = 0; j < lineList1.Count; j++)
{
if (lineList1[j].IsUse == true)
{
continue;
}
//左相交竖线
if (lineList1[j].EndY - lineList0[i].EndY > length && Math.Abs(lineList1[j].StartY - lineList0[i].StartY) < flaotNum && Math.Abs(lineList1[j].StartX - lineList0[i].StartX) < flaotNum)
{
left = lineList1[j];
}
//右相交竖线
if (lineList1[j].EndY - lineList0[i].EndY > length && Math.Abs(lineList1[j].StartY - lineList0[i].StartY) < flaotNum && Math.Abs(lineList1[j].StartX - lineList0[i].EndX) < flaotNum)
{
right = lineList1[j];
}
}
//找到左右竖线了再找下面线条
if (left != null && right != null)
{
for (int k = 0; k < lineList0.Count; k++)
{
if (lineList0[k].IsUse == true)
{
continue;
}
//水平长度一致
if (Math.Abs(lineList0[k].StartX - lineList0[i].StartX) < flaotNum && Math.Abs(lineList0[k].EndX - lineList0[i].EndX) < flaotNum)
{
//垂直能封顶
if (Math.Abs(lineList0[k].StartY - left.EndY) < flaotNum && Math.Abs(lineList0[k].EndY - right.EndY) < flaotNum)
{
Rectangle rec = new Rectangle(left.StartX, lineList0[i].StartY, right.StartX - left.StartX, lineList0[k].StartY - lineList0[i].StartY);
if (rec.Width < length || rec.Height < length)
{
continue;
}
lineList0[i].IsUse = true;
left.IsUse = true;
right.IsUse = true;
lineList0[k].IsUse = true;
ret.Add(rec);
}
}
}
}
}
return ret;
}
/// <summary>
/// 在黑白图上画出找到的线条
/// </summary>
/// <param name="source"></param>
/// <param name="lineList"></param>
/// <param name="savepath"></param>
private static void ShowLine(Bitmap source, List<LineCls> lineList, string savepath)
{
Bitmap bmp = new Bitmap(source.Width, source.Height);
Graphics g = Graphics.FromImage(bmp);
g.Clear(Color.White);
g.DrawImage(source, 0, 0);
Pen pen = new Pen(Color.Red);
pen.Width = 1;
foreach (LineCls t in lineList)
{
g.DrawLine(pen, t.StartX, t.StartY, t.EndX, t.EndY);
}
bmp.Save(savepath);
}
/// <summary>
/// 得到所有大于指定长度的线条
/// </summary>
/// <param name="numList"></param>
/// <param name="length"></param>
/// <returns></returns>
private static List<LineCls> GetAllLine(List<List<int>> numList, int length)
{
List<LineCls> ret = new List<LineCls>();
//噪音值
int floatNum = 2;
//先找横线
for (int i = 0; i < numList.Count; i++)
{
int startX = -1;
for (int j = 0; j < numList[i].Count; j++)
{
//开始点
if (startX < 0 && numList[i][j] == 1)
{
startX = j;
}
//结束点
if (startX > 0 && numList[i][j] == 0)
{
if (j - startX > length)
{
LineCls dto = new LineCls();
dto.StartX = startX;
dto.StartY = i;
dto.EndX = j;
dto.EndY = i;
dto.Drict = 0;
if (ret.Count > 0)
{
LineCls preLine = ret[ret.Count - 1];
//降噪,去除挨着的先
if ((dto.StartY - preLine.StartY < 2) && Math.Abs(dto.StartX - preLine.StartX) < floatNum && Math.Abs(dto.EndX - preLine.EndX) < floatNum)
{
continue;
}
}
ret.Add(dto);
}
startX = -1;
}
}
}
//再找竖线
for (int j = 0; j < numList[0].Count; j++)
{
int startY = -1;
for (int i = 0; i < numList.Count; i++)
{
//开始点
if (startY < 0 && numList[i][j] == 1)
{
startY = i;
}
//结束点
if (startY > 0 && numList[i][j] == 0)
{
if (i - startY > length)
{
LineCls dto = new LineCls();
dto.StartX = j;
dto.StartY = startY;
dto.EndX = j;
dto.EndY = i;
dto.Drict = 1;
if (ret.Count > 0)
{
LineCls preLine = ret[ret.Count - 1];
//降噪,去除挨着的先
if ((dto.StartX - preLine.StartX < 2) && Math.Abs(dto.StartY - preLine.StartY) < floatNum && Math.Abs(dto.EndY - preLine.EndY) < floatNum)
{
continue;
}
}
ret.Add(dto);
}
startY = -1;
}
}
}
return ret;
}
/// <summary>
/// 输出值到文本
/// </summary>
/// <param name="numList"></param>
/// <param name="path"></param>
private static void OutPutValueToTxt(List<List<int>> numList, string path)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < numList.Count; i++)
{
string oneRow = "";
for (int j = 0; j < numList[i].Count; j++)
{
oneRow += numList[i][j];
}
sb.AppendLine(oneRow);
}
WriteTxt(path, sb.ToString(), true);
}
/// <summary>
/// 图片二极化
/// </summary>
/// <param name="source"></param>
/// <returns></returns>
private static Bitmap ConvertToBipolar(Bitmap source)
{
Bitmap bmp = new Bitmap(source.Width, source.Height);
int iss = 50;
for (int y = 0; y < bmp.Height; y++)
{
for (int x = 0; x < bmp.Width; x++)
{
Color color = source.GetPixel(x, y);
int value = 255 - color.B;
Color newColor = value > iss ? Color.FromArgb(0, 0, 0) : Color.FromArgb(255, 255, 255);
bmp.SetPixel(x, y, newColor);
}
}
return bmp;
}
/// <summary>
/// 得到二极化后的二维List值
/// </summary>
/// <param name="source"></param>
/// <returns></returns>
private static List<List<int>> GetNumberMap(Bitmap source)
{
List<List<int>> ret = new List<List<int>>();
Bitmap bmp = new Bitmap(source.Width, source.Height);
for (int y = 0; y < bmp.Height; y++)
{
List<int> oneRow = new List<int>();
for (int x = 0; x < bmp.Width; x++)
{
Color color = source.GetPixel(x, y);
int value = 255 - color.B;
if (value > 0)
{
value = 1;
}
oneRow.Add(value);
}
ret.Add(oneRow);
}
return ret;
}
/// <summary>
/// 写入数据到指定文件
/// </summary>
/// <param name="path">文件全路径</param>
/// <param name="str">数据</param>
/// <param name="isReplace">是否提换,默认为替换,否则为添加</param>
/// <returns></returns>
public static bool WriteTxt(string path, string str, bool isReplace = true)
{
FileStream fs = null;
StreamWriter sw1 = null;
try
{
//如果文件不存在,先创建一个
if (!File.Exists(path))
{
//创建写入文件
fs = new FileStream(path, FileMode.Create, FileAccess.Write);
sw1 = new StreamWriter(fs, Encoding.UTF8);
//开始写入值
sw1.WriteLine(str);
}
else
{
//如果是替换,先清除之前的内容
if (isReplace)
{
using (StreamWriter sw = new StreamWriter(path, false, Encoding.UTF8))
{
sw.Write("");
sw.Close();
}
}
fs = new FileStream(path, FileMode.Append, FileAccess.Write);
sw1 = new StreamWriter(fs, Encoding.UTF8);
sw1.WriteLine(str);
}
return true;
}
catch (Exception ex)
{
throw ex;
}
finally
{
if (sw1 != null)
{
sw1.Close();
}
if (fs != null)
{
fs.Close();
}
}
}
/// <summary>
/// 描述线坐标
/// </summary>
private class LineCls
{
/// <summary>
/// 开始X
/// </summary>
public int StartX
{
get;
set;
}
/// <summary>
/// 开始Y
/// </summary>
public int StartY
{
get;
set;
}
/// <summary>
/// 结束X
/// </summary>
public int EndX
{
get;
set;
}
/// <summary>
/// 结束Y
/// </summary>
public int EndY
{
get;
set;
}
/// <summary>
/// 方向,0横线,1竖线
/// </summary>
public int Drict
{
get;
set;
}
/// <summary>
/// 是否参与了矩形
/// </summary>
public bool IsUse
{
get;
set;
}
}
}
}
花了整整大半天,喜欢就给检验点个赞吧,源自imedicallis