赞
踩
官网有教程:
接入指南: https://ai.baidu.com/docs#/Begin/top
Python sdk 文档: http://ai.baidu.com/docs#/OCR-Python-SDK/07883957
- from aip import AipOcr
-
- from PIL import Image
-
- """ 你的 APPID AK SK """
-
- APP_ID = '11673820'
-
- API_KEY = '8kEGtNlLBCDz6iYGeuNFgGBG'
-
- SECRET_KEY = 'tgP0bkUFWtRkDvy7VQ0dKz9tCdqDKj8u '
-
-
-
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
-
-
-
-
-
- """ 读取图片 """
-
- def get_file_content(filePath):
-
- with open(filePath, 'rb') as fp:
-
- return fp.read()
-
-
-
- # 先将图像进行处理
-
- image =Image.open('captha111.jpg')
-
- # 灰度处理
-
- # image = image.convert('L')
-
- # 二值化处理
-
- image = image.convert('1')
-
- image.save('aa.jpg')
-
-
-
- image = get_file_content('aa.jpg')
-
-
-
-
-
-
-
- # """ 调用通用文字识别, 图片参数为本地图片 """
-
- result = client.basicGeneral(image)
-
- print(result)
-
-
-
- # """ 如果有可选参数 """
-
- # options = {}
-
- # options["language_type"] = "ENG"
-
- # options["detect_direction"] = "true"
-
- # options["detect_language"] = "true"
-
- # options["probability"] = "true"
-
- # """ 带参数调用通用文字识别(高精度版) """
-
- # result = client.basicGeneral(image, options)
-
- # print(result)
-
-
-
- for word in result['words_result']:
-
- print(word['words'])
必要文件 ( 云打码平台 会给出几个 源文件 下载相对于的版本下来即可, 有教如何使用 ):
以 古诗文网 为例:
- import requests
-
- sessions = requests.session()
-
- headers = {
-
- 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
-
- }
-
-
-
- # 获取验证码的url
-
- response1 = sessions.get('https://so.gushiwen.org/RandCode.ashx',headers=headers)
-
-
-
- # 二进制保存图片
-
- with open('yanzhengma.png','wb')as fp:
-
- fp.write(response1.content)
-
-
-
- # 分析验证码
-
- from Shibie import Recoginitier
-
- file_path = 'yanzhengma.png'
-
- s = Recoginitier()
-
- value = s.esay_recoginition(file_path)
-
- value = str(value).split("'")[1]
-
- print(value)
-
-
-
- data = {
-
- ('email', '账号'),
-
- ('pwd', '密码'),
-
- ('code', value),
-
- }
-
-
-
- # 获取用户收藏页详情
-
- response3 = sessions.post('https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx', headers=headers, params=data)
-
-
-
- with open('bmx.html','w',encoding='utf-8')as fp:
-
- fp.write(response3.text)
-
- print(response3.status_code)
-
-
-
此平台 需要 花钱购买题分 , 不一样的验证码破解 所花的 题分 价格也不一样 , 官网也有 教学文档 :
这里就不详细的介绍了 , 直接 给出一个 成功的 实例 :
下载下来的 API 是 2.x 版本的 需要手动修改成3.x版本的即可:
第一个代码块是: 下载的API
第二个代码块是: 以简书为例的一个完整工程
- import requests
- from hashlib import md5
-
-
- class Chaojiying(object):
-
- def __init__(self, username, password, soft_id):
- self.username = username
- self.password = md5(password.encode('utf-8')).hexdigest()
- self.soft_id = soft_id
- self.base_params = {
- 'user': self.username,
- 'pass2': self.password,
- 'softid': self.soft_id,
- }
- self.headers = {
- 'Connection': 'Keep-Alive',
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
- }
-
- def post_pic(self, im, codetype):
- """
- im: 图片字节
- codetype: 题目类型 参考 http://www.chaojiying.com/price.html
- """
- params = {
- 'codetype': codetype,
- }
- params.update(self.base_params)
- files = {'userfile': ('ccc.jpg', im)}
- r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,
- headers=self.headers)
- return r.json()
-
- # 验证不通过,请求该函数 , 后台 则对该次判断不做扣分处理
- def report_error(self, im_id):
- """
- im_id:报错题目的图片ID
- """
- params = {
- 'id': im_id,
- }
- params.update(self.base_params)
- r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
- return r.json()
-
-
-
- '''
- 下面代码 用于检测 验证码
- '''
- # if __name__ == '__main__':
- #
- #
- # cjy = Chaojiying('账号', '密码', '软件ID')
- # im = open('captcha.jpg', 'rb').read()
- # content = cjy.post_pic(im, 验证码类型)
- # yanzhengma = ''
- # for key,value in content.items():
- # if key == 'pic_str':
- # yanzhengma = value
- # print(yanzhengma)
-
-
- import time
-
- from PIL import Image
- from selenium import webdriver
- from selenium.webdriver import ActionChains
-
- from 点触验证码的识别.chaojiying import Chaojiying
-
-
- def crack():
-
- # 保存网页截图
- browser.save_screenshot('222.jpg')
-
- # 获取 验证码确定按钮
- button = browser.find_element_by_xpath(xpath='//div[@class="geetest_panel"]/a/div')
-
- # 获取 验证码图片的 位置信息
- img1 = browser.find_element_by_xpath(xpath='//div[@class="geetest_widget"]')
- location = img1.location
- size = img1.size
- top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
- 'width']
- print('图片的宽:', img1.size['width'])
- print(top, bottom, left, right)
-
- # 根据获取的验证码位置信息和网页图片 对验证码图片进行裁剪 保存
- img_1 = Image.open('222.jpg')
- capcha1 = img_1.crop((left, top, right, bottom-54))
- capcha1.save('tu1-1.png')
-
- # 接入超级鹰 API 获取图片中的一些参数 (返回的是一个字典)
- cjy = Chaojiying('账号', '密码', '软件ID')
- im = open('tu1-1.png', 'rb').read()
- content = cjy.post_pic(im, 验证码类型)
- print(content)
- # 将图片中汉字的坐标位置 提取出来
- positions = content.get('pic_str').split('|')
- locations = [[int(number)for number in group.split(",")] for group in positions]
- print(positions)
- print(locations)
-
- # 根据获取的坐标信息 模仿鼠标点击验证码图片
- for location1 in locations:
- print(location1)
- ActionChains(browser).move_to_element_with_offset(img1 , location1[0],location1[1]).click().perform()
- time.sleep(1)
- button.click()
- time.sleep(1)
- # 失败后重试
- lower = browser.find_element_by_xpath('//div[@class="geetest_table_box"]/div[2]').text
- print('判断', lower)
- if lower != '验证失败 请按提示重新操作'and lower != None:
- print('登录成功')
- time.sleep(3)
- else:
- time.sleep(3)
- print('登录失败')
- # 登录失败后 , 调用 该函数 , 后台 则对该次判断不做扣分处理
- pic_id = content.get('pic_id')
- print('图片id为:',pic_id)
- cjy = Chaojiying('账号', '密码', '软件ID')
- cjy.report_error(pic_id)
- crack()
-
- if __name__ == '__main__':
- patn = 'chromedriver.exe'
- browser = webdriver.Chrome(patn)
-
- browser.get('https://www.jianshu.com/sign_in')
- browser.save_screenshot('lodin.png')
-
- # 填写from表单 点击登陆 获取验证码 的网页截图
- login = browser.find_element_by_id('sign-in-form-submit-btn')
- username = browser.find_element_by_id('session_email_or_mobile_number')
- password = browser.find_element_by_id('session_password')
- username.send_keys('账号')
- password.send_keys('密码')
- login.click()
- time.sleep(5)
- crack()
-
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。