赞
踩
目录
导入 ;pip install pillow
导入:pip install Pytesseract
Windows下安装
https://digi.bib.uni-mannheim.de/tesseract/可自行下载,点击下一步即可;
- # -*- coding: utf-8 -*-
- from PIL import Image
-
- im = Image.open('1.png') #从文件中加载图像
- img_gray = im.convert('L') # 对图片进行灰度化
-
-
- # ig.save('2.png') 保存灰度化的
-
-
- '''对图片进行2值化'''
- def binarization(ig):
- w, h = img_gray.size
- tmp = 0
- for i in range(w):
- for j in range(h):
- tmp += ig.getpixel((i, j))
- avg_pixel = tmp / w / h
- # 二值化处理
- for i in range(w):
- for j in range(h):
- p = ig.getpixel((i, j))
- if p > avg_pixel:
- ig.putpixel((i, j), 255)
- else:
- ig.putpixel((i, j), 0)
- return ig
-
-
- # img_bz = binarization(img_gray)
- # img_bz.save('3.png')
-
- '''找到噪点范围'''
- def point_list(i, j, prange):
- for x in range(i-prange, i+prange):
- for y in range(j - prange, j + prange):
- if x ==i and y==j:
- continue
- yield (x,y)
-
- '''传入需要降噪的二值化图片'''
- def reduce_noise(ig):
- w,h = ig.size
- board_rate = 0.06
- prange = 9 #可以改的范围自己调主要是看周围多少黑点
- for i in range(w):
- for j in range(h):
- if i < w * board_rate or i > w * (1 - board_rate) or j < h * board_rate or j > h * (1 - board_rate):
- ig.putpixel((i, j), 255)
- continue
- p = ig.getpixel((i, j))
- if p < 100:
- count = 0
- for x, y in point_list(i, j, prange):
- if ig.getpixel((x, y)) > 100:
- count += 1
- if count > 0.5 * ((prange * 2 + 1) ** 2 - 1):
- ig.putpixel((i, j), 255)
- return ig
-
- # ig_nz = reduce_noise(img_bz)
- # ig_nz.save('img_nz.png') #生成完整的图片方便识别
-
-
- '''识别图片'''
- import pytesseract
- pytesseract.pytesseract.tesseract_cmd=r'J:\Tesseract\tesseract.exe'
- zhi=pytesseract.image_to_string('img_nz.png')
- print(zhi)

超级鹰接口代码
3. 下载开发文档
4. 创建类的第三个参数是第2步创建的软件的ID
- #!/usr/bin/env python
- # coding:utf-8
-
- import requests
- from hashlib import md5
-
- class Chaojiying_Client(object):
-
- def __init__(self, username, password, soft_id):
- self.username = username
- password = password.encode('utf8')
- self.password = md5(password).hexdigest()
- self.soft_id = soft_id
- self.base_params = {
- 'user': self.username,
- 'pass2': self.password,
- 'softid': self.soft_id,
- }
- self.headers = {
- 'Connection': 'Keep-Alive',
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
- }
-
- def PostPic(self, im, codetype):
- """
- im: 图片字节
- codetype: 题目类型 参考 http://www.chaojiying.com/price.html
- """
- params = {
- 'codetype': codetype,
- }
- params.update(self.base_params)
- files = {'userfile': ('ccc.jpg', im)}
- r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
- return r.json()
-
- def PostPic_base64(self, base64_str, codetype):
- """
- im: 图片字节
- codetype: 题目类型 参考 http://www.chaojiying.com/price.html
- """
- params = {
- 'codetype': codetype,
- 'file_base64':base64_str
- }
- params.update(self.base_params)
- r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, headers=self.headers)
- return r.json()
-
- def ReportError(self, im_id):
- """
- im_id:报错题目的图片ID
- """
- params = {
- 'id': im_id,
- }
- params.update(self.base_params)
- r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
- return r.json()
-
-
- if __name__ == '__main__':
- chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰用户名的密码', '96001') #用户中心>>软件ID 生成一个替换 96001
- im = open('a.jpg', 'rb').read() #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
- print(chaojiying.PostPic(im, 1902)) #1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
- #print chaojiying.PostPic(base64_str, 1902) #此处为传入 base64代码

- import time
-
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.action_chains import ActionChains
- from chaojiying_Python.chaojiying import Chaojiying_Client
-
- user=''
- password=''
-
-
-
- wb = webdriver.Chrome()
- wb.get("https://aq.yy.com/")
-
- # 选择iframe
- fr = wb.find_element(by=By.XPATH,value='//iframe[@frameborder="0"]')
- wb.switch_to.frame(fr)
-
- # 选择登录注册
- wb.find_element(by=By.XPATH,value='//a[@class="lnRegister"]').click()
-
- # 切换焦点
- wb.switch_to.window(wb.window_handles[1])
- print(wb.title)
-
- # 选择iframe
- ifr = wb.find_element(by=By.XPATH,value='//iframe[@frameborder="no"]')
- wb.switch_to.frame(ifr)
-
- # 数据输入框进行输入
- wb.find_element(by=By.XPATH, value='//input[@placeholder="输入你的手机号"]').send_keys("17688888888")
- wb.find_element(by=By.XPATH, value='//input[@placeholder="设置你帐号的登录密码"]').send_keys("qwe123456")
- wb.find_element(by=By.XPATH, value='//input[@placeholder="再次输入密码"]').send_keys("qwe123456")
-
- time.sleep(2)
- #-----------------------------------------------------------
-
- def get_code():
- code_img = 'yy_register.png'
- wb.find_element(by=By.ID, value="interActiveWrap").screenshot(code_img)
- return code_img
-
- get_code()
-
- time.sleep(2)
-
-
- # 超级鹰识别
- def img_identify(img_path):
- chaojiying = Chaojiying_Client(username=user, password=password,
- soft_id='931146') # 用户中心>>软件ID 生成一个替换 96001
- im = open(img_path, 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
- resp = chaojiying.PostPic(im, 9103)
- # {'err_no': 0, 'err_str': 'OK', 'pic_id': '2219320570832210076', 'pic_str': '80,110|146,95|222,109', 'md5': '6896edf2ee6d75a59c81b24ce73967f6'}
- # '80,110|146,95|222,109'
- pic_str = resp.get('pic_str') # '80,110|146,95|222,109'
- pic_list = pic_str.split("|") # ['80,110','146,95','222,109']
- return pic_list
-
-
- img_list = img_identify('yy_register.png')
-
- # 点击图片
- def img_click(my_list):
- img_element = wb.find_element(by=By.ID, value="interActiveWrap")
- for i in my_list:
- data = i.split(',') #['80','110']
- x = int(data[0]) # 80
- y = int(data[1]) # 110
- # 将鼠标移动到距某个元素多少距离的位置
- ActionChains(wb).move_to_element_with_offset(img_element, xoffset=x, yoffset=y).click().perform()
- time.sleep(2)
- wb.find_element(by=By.XPATH, value='//span[@class="pw_submit"]').click()
-
- img_click(img_list)
-
-
- wb.find_element(by=By.XPATH, value='//span[@node-name="JCheck"]').click()
- wb.find_element(by=By.XPATH, value='//a[@class="btn_blue_v3"]').click()
-
-
-
-
-
-
-
-

- """
- 1.打开 https://captcha1.scrape.center/
- 2.点击登录出现滑块验证
- 3.计算滑块需要移动的距离
- 1、知道他们颜色的色差
- a.如果能够得到完整的图片
- b.再得到缺口图片
- 就可以找到移动的结束位置
- c.能够得到滑块图片
- 就可以找到移动的开始位置
- 4.使用selenium的动作链进行操作
- """
- import time
- from PIL import Image
- from selenium import webdriver
- from selenium.webdriver import ActionChains
- from selenium.webdriver.common.by import By
-
- def get_captcha():
- # (1)隐藏滑块 得到缺口图
- js_hide_slice ='document.getElementsByClassName("geetest_canvas_slice")[0].style.display="none"'
- driver.execute_script(js_hide_slice)
- # 截取缺口图
- part_imgpath = "./part.png"
- driver.find_element_by_class_name("geetest_canvas_bg").screenshot(part_imgpath)
-
- # (2)显示滑块 隐藏缺口图 得到滑块图
- js_show_slice = 'document.getElementsByClassName("geetest_canvas_slice")[0].style.display="block"'
- js_hide_part= 'document.getElementsByClassName("geetest_canvas_bg")[0].style.display="none"'
- driver.execute_script(js_show_slice + ";" + js_hide_part)
- # 截取滑块图
- slice_imgpath = "./slice.png"
- driver.find_element_by_class_name("geetest_canvas_slice").screenshot(slice_imgpath)
-
- # (3)显示完整图
- js_show_full1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="block"'
- js_show_full2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.opacity="1"'
- driver.execute_script(js_show_full1+";"+js_show_full2)
- # 截取完整图
- full_imgpath = "./full.png"
- driver.find_element_by_class_name("geetest_canvas_fullbg").screenshot(full_imgpath)
-
- #还原 目的 还原成原来的模样 包含缺口图与滑块
- js_hide_full1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="none"'
- js_hide_full2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.opacity="0"'
- js_show_part = 'document.getElementsByClassName("geetest_canvas_bg")[0].style.display="block"'
- driver.execute_script(js_hide_full1 + ";" + js_hide_full2+ ";" +js_show_part)
-
- return part_imgpath,slice_imgpath,full_imgpath
-
- # 1.计算滑块的x轴坐标
- def get_slice_x(img_sclice):
- img = Image.open(img_sclice)
- w, h = img.size
- for x in range(w):
- for y in range(h):
- rgb = img.getpixel((x,y)) # (255,255,255)
- if rgb[0] + rgb[1] + rgb[2] < 600:
- print("滑块的坐标", x)
- return x
-
- # 2.计算缺口图的x轴坐标
- def get_bg_x(img_bg, img_full):
- bg = Image.open(img_bg)
- full = Image.open(img_full)
- w, h = bg.size
- for x in range(w):
- for y in range(h):
- bg_point = bg.getpixel((x,y))
- full_point = full.getpixel((x, y))
- r = bg_point[0] - full_point[0]
- g = bg_point[1] - full_point[1]
- b = bg_point[2] - full_point[2]
- abs_value = abs(r) + abs(g) + abs(b) # 33
- if abs_value > 180:
- print("缺口的坐标", x)
- return x
-
- # 计算滑动的距离
- def get_distance(img_slice, img_bg, img_full):
- slice_x = get_slice_x(img_slice)
- bg_x = get_bg_x(img_bg, img_full)
- data = abs(slice_x - bg_x)
- print('计算距离',data)
- return data
-
- # 模拟滑动
- def move_silder(tracks):# 100
- # 确定滑块对象
- element = driver.find_element(by=By.CLASS_NAME, value='geetest_slider_button')
- action_chains = ActionChains(driver)
- # 执行滑块的动作
- action_chains.click_and_hold(element).perform()
- action_chains.pause(0.2)
- action_chains.move_by_offset(tracks-10, 0) #
- action_chains.pause(0.6)
- action_chains.move_by_offset(10, 0) #
- action_chains.pause(0.6)
- action_chains.release().perform()
-
-
- if __name__ == '__main__':
- driver = webdriver.Chrome()
- url = 'https://captcha1.scrape.center/'
- driver.get(url)
- time.sleep(5) # 这里需要有一个停顿
- driver.find_element(by=By.CLASS_NAME, value='el-button--primary').click()
- time.sleep(2)
- # 解决问题3的截图操作(a,b,c)
- img_bg, img_slice, img_full = get_captcha()
- # 计算距离
- # 1.计算滑块的x轴坐标
- # get_slice_x('slice.png')
- # 2.计算缺口图的x轴坐标
- # get_bg_x('part.png', 'full.png')
- distance = get_distance(img_slice, img_bg, img_full)
- move_silder(distance)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。