赞
踩
数据采集中,尤其是时效性敏感的数据,比如:微博热搜,这类“过了这个村就没有这个店”的数据,为了证明当时数据采集的准确性与完整性,最直接有效的办法就是保留当时采集的网页页面。
本文主要交流点,Appium模拟操作截屏需要滑动时,如何得到一张无缝的完整长图。
基本思路:图片截取->计算截取后图片的差异值->取最小差异值的截取位置进行截取->对截取图片进行拼接。因为微博热搜有所谓的“页眉”,所以会有一个去“页眉”的截取步骤。
原始图片
——————————————————图片分隔符————————————————————
——————————————————图片分隔符————————————————————
截取图片
——————————————————图片分隔符————————————————————
——————————————————图片分隔符————————————————————
拼接图片
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv</artifactId>
<version>1.3.3</version>
</dependency>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.3.3</version>
</dependency>
类Pic
public class Pic { public static final Logger LOGGER = LoggerFactory.getLogger(Pic.class); public void cut(InputStream input, OutputStream out, int x, int y, int width, int height) throws Exception { ImageInputStream imageStream = null; try { Iterator readers = ImageIO.getImageReadersByFormatName("png"); ImageReader reader = (ImageReader) readers.next(); imageStream = ImageIO.createImageInputStream(input); reader.setInput(imageStream, true); ImageReadParam param = reader.getDefaultReadParam(); Rectangle rect = new Rectangle(x, y, width, height); param.setSourceRegion(rect); BufferedImage bi = reader.read(0, param); ImageIO.write(bi, "png", out); } finally { if (imageStream != null) { imageStream.close(); } } } public void merge(List<String> picList, String pic2, String type) throws Exception { if (picList == null || picList.size() <= 0) { throw new Exception("无待合并图片!"); } int dstHeight = 0; int dstWidth = 0; File[] file = new File[picList.size()]; BufferedImage[] images = new BufferedImage[picList.size()]; int[][] ImageArrays = new int[picList.size()][]; for (int i = 0; i < picList.size(); i++) { file[i] = new File(picList.get(i)); images[i] = ImageIO.read(file[i]); int width = images[i].getWidth(); int height = images[i].getHeight(); ImageArrays[i] = new int[width * height]; ImageArrays[i] = images[i].getRGB(0, 0, width, height, ImageArrays[i], 0, width); dstWidth = dstWidth > width ? dstWidth : width; dstHeight += height; } if (dstHeight <= 0) { throw new Exception("图片合并高度为0!"); } BufferedImage imageNew = new BufferedImage(dstWidth, dstHeight, BufferedImage.TYPE_INT_RGB); int height_i = 0; for (int i = 0; i < images.length; i++) { int width = images[i].getWidth(); int height = images[i].getHeight(); imageNew.setRGB(0, height_i, width, height, ImageArrays[i], 0, width); height_i += height; } ImageIO.write(imageNew, type, new File(pic2)); } public double compare(String targetImageUrl, String baseImageUrl) throws Exception { opencv_core.Mat targetImage = imread(targetImageUrl); opencv_core.Mat baseImage = imread(baseImageUrl); if (targetImage.size().width() == baseImage.size().width()) { if (targetImage.size().height() != baseImage.size().height()) { if (targetImage.size().height() > baseImage.size().height()) { targetImage = dealLong(targetImage.clone(), baseImage.clone()); } else { baseImage = dealLong(baseImage.clone(), targetImage.clone()); } } opencv_core.Mat imageDiff = compare(targetImage, baseImage); double nonZeroPercent = 100 * (double) countNonZero(imageDiff) / (imageDiff.size().height() * imageDiff.size().width()); imageDiff.release(); baseImage.release(); targetImage.release(); return nonZeroPercent; } else { throw new Exception("图片比对时,图片宽度不一致!"); } } public int interceptBarHeight(opencv_core.Mat longImage, opencv_core.Mat shortImage) { int imageSearchMaxHeight = 240; opencv_core.Mat subImageLong = new opencv_core.Mat(longImage, new opencv_core.Rect(0, longImage.size().height() - imageSearchMaxHeight, longImage.size().width(), imageSearchMaxHeight)); opencv_core.Mat subImageShort = new opencv_core.Mat(shortImage, new opencv_core.Rect(0, shortImage.size().height() - imageSearchMaxHeight, shortImage.size().width(), imageSearchMaxHeight)); opencv_core.Mat imageDiff = compare(subImageLong, subImageShort); for (int row = imageDiff.size().height() - 1; row > -1; row--) { for (int col = 0; col < imageDiff.size().width(); col++) { BytePointer bytePointer = imageDiff.ptr(row, col); if (bytePointer.get(0) != 0) { imageDiff.release(); return imageSearchMaxHeight - row; } } } return imageSearchMaxHeight; } public opencv_core.Mat dealLong(opencv_core.Mat longImage, opencv_core.Mat shortImage) { int barHeight = interceptBarHeight(longImage, shortImage); opencv_core.Mat dealedLongImage = new opencv_core.Mat(longImage, new opencv_core.Rect(0, 0, longImage.size().width(), shortImage.size().height() - barHeight)); opencv_core.Mat imageBar = new opencv_core.Mat(longImage, new opencv_core.Rect(0, longImage.size().height() - barHeight, longImage.size().width(), barHeight)); opencv_core.Mat dealedLongImageNew = dealedLongImage.clone(); vconcat(dealedLongImage, imageBar, dealedLongImageNew); imageBar.release(); dealedLongImage.release(); return dealedLongImageNew; } public opencv_core.Mat compare(opencv_core.Mat targetImage, opencv_core.Mat baseImage) { opencv_core.Mat targetImageClone = targetImage.clone(); opencv_core.Mat baseImageColne = baseImage.clone(); opencv_core.Mat imgDiff1 = targetImage.clone(); opencv_core.Mat imgDiff = targetImage.clone(); // 将图片转成灰度图 cvtColor(targetImage, targetImageClone, COLOR_BGR2GRAY); cvtColor(baseImage, baseImageColne, COLOR_BGR2GRAY); // 两个矩阵相减,获得差异图 subtract(targetImageClone, baseImageColne, imgDiff1); subtract(baseImageColne, targetImageClone, imgDiff); // 按比重进行叠加 addWeighted(imgDiff, 1, imgDiff1, 1, 0, imgDiff); // 图片二值化,大于24的为1,小于24的为0 threshold(imgDiff, imgDiff, 24, 255, THRESH_BINARY); erode(imgDiff, imgDiff, new opencv_core.Mat()); dilate(imgDiff, imgDiff, new opencv_core.Mat()); return imgDiff; } }
类WeiboPic
public class WeiboPic extends Pic { public static final Logger LOGGER = LoggerFactory.getLogger(WeiboPic.class); public void process(List<String> picList, String pic2) throws Exception { if (picList == null || picList.size() <= 0) { throw new Exception("无待处理图片!"); } if (picList.size() > 1) { List<String> picMergeList = new ArrayList<>(); boolean isNext = true; for (int i = 1; i < picList.size(); i++) { String picOne = picList.get(i - 1); String picTwo = picList.get(i); String picOneTmp = picOne.replace(".png", "_tmp.png"); String picOneSnapshot = picOne.replace(".png", "_snapshot.png"); String picTwoTmp = picTwo.replace(".png", "_tmp.png"); if (i < picList.size() - 1) { String picThree = picList.get(i + 1); String picThreeTmp = picThree.replace(".png", "_tmp.png"); cut(new FileInputStream(picTwo), new FileOutputStream(picTwoTmp), 0, 1920 - 360, 1080, 1920); cut(new FileInputStream(picThree), new FileOutputStream(picThreeTmp), 0, 1920 - 360, 1080, 1920); if (compare(picTwoTmp, picThreeTmp) <= 0.0) { isNext = false; } } cut(new FileInputStream(picTwo), new FileOutputStream(picTwoTmp), 0, 360, 1080, 1920); double nonZeroPercentMin = 0.0; int yMin = 0; for (int y = 1920 - 400; y >= 360; y--) { LOGGER.info("当前处理图片:{},Y轴:{}", picOne, y); cut(new FileInputStream(picOne), new FileOutputStream(picOneTmp), 0, y, 1080, 1920); double nonZeroPercent = compare(picOneTmp, picTwoTmp); if (nonZeroPercent <= 0.0) { yMin = y; break; } else if (nonZeroPercentMin <= 0.0 || nonZeroPercentMin > nonZeroPercent) { nonZeroPercentMin = nonZeroPercent; yMin = y; } } if (i == 1) { cut(new FileInputStream(picOne), new FileOutputStream(picOneSnapshot), 0, 0, 1080, yMin); } else { cut(new FileInputStream(picOne), new FileOutputStream(picOneSnapshot), 0, 360, 1080, yMin - 360); } picMergeList.add(picOneSnapshot); if (!isNext || i == picList.size() - 1) { picMergeList.add(picTwoTmp); break; } } merge(picMergeList, pic2, "png"); } else { merge(picList, pic2, "png"); } } public String picSnapshotPath = PropKit.get("path.pic.snapshot"); public static void main(String[] args) throws Exception { InitService initService = new InitService(); initService.initProp(); WeiboPic weiboPic = new WeiboPic(); List<String> hotwordScrFilePathList = new ArrayList<>(); hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505521_184302e4-7ffb-42e9-8131-a60a91cf9586.png"); hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505526_6795fd01-a5af-4536-81ce-4ab96905d163.png"); hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505531_6c00ecc8-9c01-4b53-a9e2-00f3bc443964.png"); String hotwordScrFilePath2 = weiboPic.picSnapshotPath + File.separator + (int) (System.currentTimeMillis() / 1000) + "_" + UUID.randomUUID().toString() + ".png"; weiboPic.process(hotwordScrFilePathList, hotwordScrFilePath2); } }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。