赞
踩
网上查了很多资料, 汇总了几个不错的代码, 顺便做个笔记
简单方便, 后也好处理
# -*- coding:utf-8 -*- import os from PIL import Image from PIL import ImageFont from PIL import ImageDraw def watermark_Image(img_path, output_path): img = Image.open(img_path) draw = ImageDraw.Draw(img) # 图片水印 # # 打开水印图片 # watermark = Image.open('1.png') # # 计算水印图片大小 # wm_width, wm_height = watermark.size # # 计算原图大小 # img_width, img_height = img.size # wm_width = int(wm_width * 1.5) # wm_height = int(wm_height * 1.5) # watermark = watermark.resize((wm_width, wm_height)) # # 添加水印 # img.paste(watermark, (5, 5), watermark) # 字体水印 text = "CSDN" # 设置字体、字体大小等等 font = ImageFont.truetype('arial.ttf', 33) # 添加水印 draw.text((50, 50), text, font=font, fill=(255, 255, 255)) # 保存图片 img.save(output_path) def run(in_path): out_path = './img_watermark' # 带水印图片路径 if not os.path.exists(out_path): os.makedirs(out_path) file_ls = os.listdir(in_path) for file in file_ls: img_path = './{}/{}'.format(in_path, file) output_path = img_path.replace(in_path, out_path) try: watermark_Image(img_path, output_path) print(file, '完成!') except Exception as e: print(img_path, e) break if __name__ == '__main__': run('./img_data')
太复杂, 而且后面清洗水印不好清除
import os import sys import argparse from PIL import Image, ImageDraw, ImageFont, ImageEnhance def read_origin_photo(photo_path, photo_angle=0): """ 获取图像内容与尺寸 photo_path:图片路径 photo_angle: 图片旋转角度 """ origin_photo = Image.open(photo_path) origin_photo = origin_photo.convert('RGBA') origin_photo = origin_photo.rotate(photo_angle, expand=True) h, w = origin_photo.size return origin_photo, h, w # def get_color(text_color): # r = int(text_color[1:3], base=16) # g = int(text_color[3:5], base=16) # b = int(text_color[5:7], base=16) # return r, g, b def make_text_picture(h, w, text, font_path, font_size=40, angle=-45, color=(0, 0, 0)): """ 制作水印图片 h: 原图高度 w: 原图宽度 font_path:字体文件路径 font_size:字体大小 angle:字体旋转角度 color:字体颜色 """ text_pic = Image.new('RGBA', (4 * h, 4 * w), (255, 255, 255, 255)) fnt = ImageFont.truetype('arial.ttf', size=font_size) text_d = ImageDraw.Draw(text_pic) # a, b 分别控制水印的列间距和行间距,默认为字体的2倍列距,4倍行距 a, b = 2, 4 for x in range(10, text_pic.size[0] - 10, a * font_size * len(text)): for y in range(10, text_pic.size[1] - 10, b * font_size): text_d.multiline_text((x, y), text, fill=color, font=fnt) # 旋转水印 text_pic = text_pic.rotate(angle) # 截取水印部分图片 text_pic = text_pic.crop((h, w, 3 * h, 3 * w)) # text_pic.show() return text_pic def combine(origin_photo, text_pic, alpha=0.2, out_name='out.jpg'): """ 为图片添加水印并保存 origin_photo: 原图内容 text_pic: 要添加的水印图片 alpha:水印的不透明度 out_name: 输出图片的文件名 """ # 合并水印图片和原图 text_pic = text_pic.resize(origin_photo.size) out = Image.blend(origin_photo, text_pic, alpha) out = out.convert('RGB') # 增强图片对比度 enhance = ImageEnhance.Contrast(out) out = enhance.enhance(1.0 / (1 - alpha)) out_path = os.path.join('./img_no_watermark/', out_name) out.save(out_path) out.show() if __name__ == '__main__': # 获取cmd命令参数, 弊端:太复杂, 后面可以改成字典 parser = argparse.ArgumentParser() parser.add_argument('-p', dest='path', default='./img_data/e7a88f27-dc2c-11ee-8e27-508140236042.jpg', help='图片路径,如:1.jpg或./images/1.jpg') parser.add_argument('-t', dest='text', default='Python', help="要添加的水印内容") parser.add_argument('--photo_angle', dest='photo_angle', default=0, help='原图片旋转角度,默认为0,不进行旋转') parser.add_argument('--new_image_name', dest='new_image_name', default=None, help='输出图片的名称, 默认为"原图片名_with_watermark.jpg", 图片保存在out_images目录下') # parser.add_argument('--font_path', dest='font_path', default=r"./fonts/STSONG.TTF", # help='要使用的字体路径,如 STSONG.TTF,windows可在C:\Windows\Fonts查找字体') parser.add_argument('--text_angle', dest='text_angle', default=-45, help='水印的旋转角度,0为水平,-90位从上向下垂直, 90为从下向上垂直,默认-45') parser.add_argument('--text_color', dest='text_color', default='#000000', help="水印颜色,默认#000000(黑色)") parser.add_argument('--text_size', dest='text_size', default=40, help='水印字体的大小, 默认40') parser.add_argument('--text_alpha', dest='text_alpha', default=0.2, help='水印的不透明度,建议0.2~0.3,默认0.2') args = parser.parse_args() # args 其实就是一个另类的自带你 args.path = './img_data/8d9337c7-dcf9-11ee-b5dc-508140236042.jpg' photo_path = args.path print(photo_path) text = args.text if not photo_path or not text: print('必须指定图片路径和水印文字') sys.exit(-1) photo_angle = int(args.photo_angle) font_path = '' text_size = int(args.text_size) text_angle = int(args.text_angle) origin_photo, h, w = read_origin_photo(photo_path, photo_angle) text_pic = make_text_picture(h, w, text, font_path, font_size=text_size, angle=text_angle, color=args.text_color) new_image_name = args.new_image_name photo_name = os.path.split(photo_path)[-1].split('.')[0] # 获取图片名称 if new_image_name is None: new_image_name = photo_name + '_with_watermark.jpg' combine(origin_photo, text_pic, alpha=float(args.text_alpha), out_name=new_image_name)
找了好多去水印代码,只有这个效果不错,但是代码需要水印模板来确定水印位置, 当然如果水印少且位置固定可以不用(例如1.1), 2.2就不展示了, 基本没变化
原理就是通过模板找到相同形状图案位置,然后根据旁边像素点进行补充
水印模板
1.1 图片去水印效果
# coding=utf-8 import os import cv2 import numpy as np # 膨胀算法 Kernel _DILATE_KERNEL = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0]], dtype=np.uint8) class WatermarkRemover(object): """" 去除图片中的水印(Remove Watermark) """ def __init__(self, verbose=True): self.verbose = verbose self.watermark_template_gray_img = None self.watermark_template_mask_img = None self.watermark_template_h = 0 self.watermark_template_w = 0 def load_watermark_template(self, watermark_template_filename): """ 加载水印模板,以便后面批量处理去除水印 :param watermark_template_filename: :return: """ self.generate_template_gray_and_mask(watermark_template_filename) def dilate(self, img): """ 对图片进行膨胀计算 :param img: :return: """ dilated = cv2.dilate(img, _DILATE_KERNEL) return dilated def generate_template_gray_and_mask(self, watermark_template_filename): """ 处理水印模板,生成对应的检索位图和掩码位图 检索位图 即处理后的灰度图,去除了非文字部分 :param watermark_template_filename: 水印模板图片文件名称 :return: x1, y1, x2, y2 """ # 水印模板原图 img = cv2.imread(watermark_template_filename) # 灰度图、掩码图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, mask = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO + cv2.THRESH_OTSU) _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY) mask = self.dilate(mask) # 使得掩码膨胀一圈,以免留下边缘没有被修复 #mask = self.dilate(mask) # 使得掩码膨胀一圈,以免留下边缘没有被修复 # 水印模板原图去除非文字部分 img = cv2.bitwise_and(img, img, mask=mask) # 后面修图时需要用到三个通道 mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) self.watermark_template_gray_img = gray self.watermark_template_mask_img = mask self.watermark_template_h = img.shape[0] self.watermark_template_w = img.shape[1] # cv2.imwrite('watermark-template-gray.jpg', gray) # cv2.imwrite('watermark-template-mask.jpg', mask) return gray, mask def find_watermark(self, filename): """ 从原图中寻找水印位置 :param filename: :return: x1, y1, x2, y2 """ # Load the images in gray scale gray_img = cv2.imread(filename, 0) return self.find_watermark_from_gray(gray_img, self.watermark_template_gray_img) def find_watermark_from_gray(self, gray_img, watermark_template_gray_img): """ 从原图的灰度图中寻找水印位置 :param gray_img: 原图的灰度图 :param watermark_template_gray_img: 水印模板的灰度图 :return: x1, y1, x2, y2 """ # Load the images in gray scale method = cv2.TM_CCOEFF # Apply template Matching res = cv2.matchTemplate(gray_img, watermark_template_gray_img, method) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]: x, y = min_loc else: x, y = max_loc return x, y, x + self.watermark_template_w, y + self.watermark_template_h def remove_watermark_raw(self, img, watermark_template_gray_img, watermark_template_mask_img): """ 去除图片中的水印 :param img: 待去除水印图片位图 :param watermark_template_gray_img: 水印模板的灰度图片位图,用于确定水印位置 :param watermark_template_mask_img: 水印模板的掩码图片位图,用于修复原始图片 :return: 去除水印后的图片位图 """ # 寻找水印位置 # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # x1, y1, x2, y2 = self.find_watermark_from_gray(img_gray, watermark_template_gray_img) # 水印位置固定 x1, y1, x2, y2 = 50, 55, 170, 80 # 制作原图的水印位置遮板 mask = np.zeros(img.shape, np.uint8) watermark_template_mask_img = cv2.cvtColor(watermark_template_gray_img, cv2.COLOR_GRAY2BGR) # print(self.watermark_template_w, self.watermark_template_h) # 水印文章固定用这个 mask[y1:y1 + self.watermark_template_h, x1:x1 + self.watermark_template_w] = watermark_template_mask_img # 不固定用这个 # mask[y1:y2, x1:x2] = watermark_template_mask_img mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) # 用遮板进行图片修复,使用 TELEA 算法 dst = cv2.inpaint(img, mask, 5, cv2.INPAINT_TELEA) # cv2.imwrite('dst.jpg', dst) return dst def remove_watermark(self, filename, output_filename=None): """ 去除图片中的水印 :param filename: 待去除水印图片文件名称 :param output_filename: 去除水印图片后的输出文件名称 :return: 去除水印后的图片位图 """ # 读取原图 img = cv2.imread(filename) dst = self.remove_watermark_raw(img, self.watermark_template_gray_img, self.watermark_template_mask_img ) if output_filename is not None: cv2.imwrite(output_filename, dst) return dst def run(in_path): # 水印图片路径 watermark_template_filename = './watermark.png' remover = WatermarkRemover() remover.load_watermark_template(watermark_template_filename) out_path = './img_no_watermark' if not os.path.exists(out_path): os.makedirs(out_path) file_ls = os.listdir(in_path) for file in file_ls: in_img_path = r'{}/{}'.format(in_path, file) out_img_path = r'{}/{}'.format(out_path, file) try: remover.remove_watermark(in_img_path, out_img_path) print('{} 完成!'.format(file)) except Exception as e: print(e, in_img_path) if __name__ == '__main__': run('./img_watermark')
原理就是两个pdf合并到一块
代码会生成一个水印.pdf
执行代码效果
import os from PyPDF2 import PdfReader, PdfWriter from reportlab.lib.units import cm from reportlab.pdfgen import canvas def create_watermark(content): """水印信息""" # 默认大小为21cm*29.7cm file_name = "watermark.pdf" c = canvas.Canvas(file_name, pagesize=(30*cm, 30*cm)) # 移动坐标原点(坐标系左下为(0,0)) c.translate(10*cm, 5*cm) # 设置字体 c.setFont("Helvetica", 30) # 指定描边的颜色 c.setStrokeColorRGB(0, 1, 0) # 指定填充颜色 c.setFillColorRGB(255, 0, 0) # 旋转45度,坐标系被旋转 c.rotate(30) # 指定填充颜色 c.setFillColorRGB(255, 0, 0, 0.1) # 设置透明度,1为不透明 # c.setFillAlpha(0.1) # 画几个文本,注意坐标系旋转的影响 for i in range(5): for j in range(10): a=10*(i-1) b=5*(j-2) c.drawString(a*cm, b*cm, content) c.setFillAlpha(0.1) # 关闭并保存pdf文件 c.save() return file_name def add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out): """把水印添加到pdf中""" pdf_output = PdfWriter() input_stream = open(pdf_file_in, 'rb') pdf_input = PdfReader(input_stream, strict=False) # 获取PDF文件的页数 pageNum = len(pdf_input.pages) # 读入水印pdf文件 pdf_watermark = PdfReader(open(pdf_file_mark, 'rb'), strict=False) # 给每一页打水印 for i in range(pageNum): page = pdf_input._get_page(i) page.merge_page(pdf_watermark._get_page(0)) page.compress_content_streams() # 压缩内容 pdf_output.add_page(page) pdf_output.write(open(pdf_file_out, 'wb')) def run(path): # 生成水印 pdf_file_mark = create_watermark('CSDN') out_path = './pdf_watermark' if not os.path.exists(out_path): os.makedirs(out_path) file_ls = os.listdir(path) for file in file_ls: pdf_file_in = f'{path}/{file}' pdf_file_out = f'./{out_path}/{file}' try: add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out) print(pdf_file_out, '完成!') except Exception as e: print(pdf_file_in, e) break if __name__ == '__main__': run('./pdf_data')
原理就是把pdf转成一张张图片, 因为水印一般都是浅色且透明,所以根据水印色差对图片整体色差进行调整, 从而去除水印
水印 RGB颜色 越高越透明, 所以需要注意别写太死, 留点空间, rgb 是 230 写成 210
例如:
效果图:
import os import shutil import cv2 import numpy as np import fitz from fpdf import FPDF from PIL import Image import tempfile # 定义A4纸张在300 DPI下的像素尺寸(宽度和高度) A4_SIZE_PX_300DPI = (2480, 3508) # 图像去除水印函数 def remove_watermark(image_path): img = cv2.imread(image_path) # 水印RGB颜色, 不要写太准, 估个差值(例如230, 改成210), 而且这三个数需要一致 lower_hsv = np.array([210, 210, 210]) upper_hsv = np.array([255, 255, 255]) mask = cv2.inRange(img, lower_hsv, upper_hsv) mask = cv2.GaussianBlur(mask, (1, 1), 0) img[mask == 255] = [255, 255, 255] cv2.imwrite(image_path, img) # 将PDF转换为图片,并保存到指定目录 def pdf_to_images(pdf_path, output_folder): images = [] doc = fitz.open(pdf_path) for page_num in range(doc.page_count): page = doc[page_num] # 设置分辨率为300 DPI pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72)) image_path = os.path.join(output_folder, f"page_{page_num + 1}.png") pix.save(image_path) images.append(image_path) # 去除每张图片的水印 remove_watermark(image_path) return images def images_to_pdf(image_paths, output_path): pdf_writer = FPDF(unit='pt', format='A4') # 定义A4纸张在300 DPI下的尺寸(宽度和高度) A4_SIZE_MM = (210, 297) A4_SIZE_PX_300DPI = (A4_SIZE_MM[0] * 300 / 25.4, A4_SIZE_MM[1] * 300 / 25.4) for image_path in image_paths: with Image.open(image_path) as img: width, height = img.size # 计算图像是否需要缩放以适应A4纸张,并保持长宽比 ratio = min(A4_SIZE_PX_300DPI[0] / width, A4_SIZE_PX_300DPI[1] / height) # 缩放图像以适应A4纸张,并保持长宽比 img_resized = img.resize((int(width * ratio), int(height * ratio)), resample=Image.LANCZOS) # 创建临时文件并写入图片数据 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file: img_resized.save(temp_file.name, format='PNG') # 添加一页,注意已经按300DPI处理了图片大小 pdf_writer.add_page() # 使用临时文件路径添加图像到PDF,尺寸应基于已调整为300DPI分辨率的图片 pdf_writer.image(temp_file.name, x=0, y=0, w=pdf_writer.w, h=pdf_writer.h) # 清理临时文件 for image_path in image_paths: _, temp_filename = os.path.split(image_path) if os.path.exists(temp_filename): os.remove(temp_filename) pdf_writer.output(output_path) def run(path): out_path = './pdf_no_watermark' if not os.path.exists(out_path): os.makedirs(out_path) file_ls = os.listdir(path) for file in file_ls: pdf_file_in = f'{path}/{file}' pdf_file_out = f'{out_path}/{file}' output_folder = './output_images' os.makedirs(output_folder, exist_ok=True) # 创建输出目录(如果不存在) try: image_paths = pdf_to_images(pdf_file_in, output_folder) images_to_pdf(image_paths, pdf_file_out) print(pdf_file_out) except Exception as e: print(pdf_file_in, e) shutil.rmtree(output_folder) if __name__ == '__main__': run('./pdf_watermark')
最后还是感谢很多大佬的分享, 我只是把他们的代码做了个汇总
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。