赞
踩
Premature end of JPEG file
我们在Caffe训练中,或者其他opencv读取JPG图像任务中,常常遇到异常提示:Premature end of JPEG file;
如下图中
这个异常通常是不会影响你正常训练节奏的,但是它却可能影响你的训练效果(当然出现少,看不出明显差距)
如果跟踪异常来源,会发现是opencv底层读取图像函数发出的异常,那会是opencv出了问题吗?个人认为不是!那么这到底是个什么异常呢?
答案是:JPEG图像格式破损;也就是你的训练样本JPEG图像格式出了毛病
你用图像浏览器打开,是正常的,如下图
同样图像用UltraEdit工具打开,查看其16进制格式的首尾,如图
文件头2个字节:0xff,0xd8(JPEG文件标识SOI)
文件尾2个字节:0xff,0xd9(JPEG文件结束标识EOI)
注意其右下角,存在灰色马赛克,其末尾字节0xd9,0x9c与标识不符,如下图:
有些图像破损更严重,图像浏览器都无法打开图像
写个小程序,check一遍数据集,将存在破损的JPEG图像挑出来
将这部分图像用opencv重写一遍,因为有些破损图像重写以后,在不影响训练情况下,可以继续用着训练集;如上图破损图像,在重写后与该图像一样,但格式变正常,而其右下角马赛克,对像分类、人脸检测、人像分割等任务是没有影响的,则可以继续放回训练集。直接上代码:
#include "iostream" #include "opencv2/opencv.hpp" #include <string> #include <vector> #include <io.h> #include <fstream> #include <direct.h> #include <stdlib.h> using namespace std; using namespace cv; #pragma comment(lib,"opencv_world341.lib") //遍历文件夹下 所有文件夹 void ReadDirPath(string basePath, vector<string>& dirList) { //dirList.push_back(basePath); //文件句柄 long long hFile = 0; //文件信息 struct _finddata_t fileinfo; string p; if ((hFile = _findfirst(p.assign(basePath).append("\\*").c_str(), &fileinfo)) != -1) { do { //如果是目录,迭代之 //如果不是,加入列表 if (strcmp(fileinfo.name, ".") == 0 || strcmp(fileinfo.name, "..") == 0) { continue; } else { if ((fileinfo.attrib & 0x10) == _A_SUBDIR) { string dir = p.assign(basePath).append("\\").append(fileinfo.name); dirList.push_back(dir); ReadDirPath(dir, dirList); } } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } } //编译文件夹下 所有文件(这里加了后缀过滤) void ReadImagePath(string basePath, vector<string>& imageList) { if (!imageList.empty()) { imageList.clear(); } vector<string> dirList; dirList.push_back(basePath); ReadDirPath(basePath, dirList); for (int i = 0; i < dirList.size(); i++) { long long hFile = 0; //文件信息 struct _finddata_t fileinfo; string p; if ((hFile = _findfirst(p.assign(dirList[i]).append("\\*.*").c_str(), &fileinfo)) != -1) { do { if (strcmp(fileinfo.name, ".") == 0 || strcmp(fileinfo.name, "..") == 0) { continue; } else { string name = fileinfo.name; if (name.size()<5) { continue; } name = name.substr(name.size() - 4, name.size()); if (name == ".jpg" || name == ".JPG" || name == ".png" || name == ".PNG" || name == "jpeg" || name == "JPEG") { string dir = p.assign(dirList[i]).append("\\").append(fileinfo.name); imageList.push_back(dir); } } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } } } //检查jpeg bool CheckJpeg(string file) { if (file.empty()) { return false; } ifstream in(file.c_str(), ios::in | ios::binary); if (!in.is_open()) { cout << "Error opening file!" << endl; return false; } int start; in.read((char*)&start, 4); short int lstart = start << 16 >> 16; //cout << hex << lstart << " "; in.seekg(-4, ios::end); int end; in.read((char*)&end, 4); short int lend = end >> 16; //cout << hex << lend << endl; in.close(); if ((lstart != -9985) || (lend != -9729)) //0xd8ff 0xd9ff { return true; } return false; } int main(int argc, char* argv[]) { if (argc < 3) { cout << "Please Use: [image_path] [isdemo]" << endl; system("pause"); return -1; } else if (argc >3) { cout << "Parameters too much" << endl; system("pause"); return -1; } string image_path = argv[1]; vector<string> image_list; ReadImagePath(image_path, image_list); if (image_list.size()<1) { cout << image_path << ": This path has no jpeg image!" << endl; system("pause"); return -1; } int num = image_list.size(); cout << "Check image plan: " << endl; for (size_t i = 0; i < num; i++) { printf("%d/%d\r", i, num), fflush(stdout); string save_dir = image_path; string name = image_list[i]; name = name.substr(name.size() - 4, name.size()); bool isJpg = false; if (name == ".jpg" || name == ".JPG" || name == "jpeg" || name == "JPEG") { isJpg = CheckJpeg(image_list[i]); } if (isJpg) { save_dir += "_false"; } Mat img = imread(image_list[i]); if (atoi(argv[2]) && !img.empty()) { imshow("img", img); cvWaitKey(0); } if (isJpg) //格式破损 { if (_access(save_dir.c_str(), 6) == -1) { _mkdir(save_dir.c_str()); } if (!img.empty()) { string image_name = image_list[i]; image_name = image_name.substr(image_name.rfind("\\")); imwrite(save_dir + image_name, img); } remove(image_list[i].c_str()); } } cout << "finished!" << endl; //system("pause"); return 0; }
编译为exe,可用run.bat启动,
命令行说明
最终会将存在问题的图像重写一遍,放在与你图像路径文件夹xxx相同位置下,新建xxx_false的文件下。分类的话也会新建文件夹,当然读者也可根据自己喜好更改。
下载链接:链接:https://pan.baidu.com/s/1HtiKHyZPsbh2T2-tFytCDQ 提取码:95oc
复制这段内容后打开百度网盘手机App,操作更方便哦
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。