赞
踩
#!/usr/bin/env python # encoding: utf-8 ''' @author: JHC @license: None @contact: JHC000abc@gmail.com @file: ttt.py @time: 2022/4/22 23:27 @desc:替换self.uuid就能把题和答案写入mysql数据库 uuid在cookies里 ''' import json import requests import pymysql.cursors import sys from tqdm import tqdm class Spider(): def __init__(self): self.con = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='questionsbank', charset='utf8', cursorclass=pymysql.cursors.DictCursor) # self.uuid="V8qYDjej" self.uuid = "Vj1vy1A7" self.courseId = "10464858" self.randomExerciseStyle = "0" self.isFirst = True self.params = ( ('courseId', self.courseId), ('randomExerciseStyle',self.randomExerciseStyle), ('isFirst',self.isFirst), ('uuid',self.uuid) ) self.url_queryAnswerSheet = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryAnswerSheet' self.url_queryRandomExerciseDetail = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryRandomExerciseDetail' def get_exerciseId_list(self): '''获取练习所有题id''' res = requests.get(url=self.url_queryAnswerSheet,params=self.params) exerciseId_list = [i["exerciseId"] for i in res.json()["rt"]["lists"]] return exerciseId_list def get_content(self): '''获取题目内容''' exerciseId_list = self.get_exerciseId_list() INDEX = 1 for questionId in tqdm(exerciseId_list): params = ( ('courseId', self.courseId), ('questionId', questionId), ('times', '2'), ('randomExerciseStyle', self.randomExerciseStyle), ('uuid', self.uuid), ) response = requests.get(url=self.url_queryRandomExerciseDetail, params=params) if response.json()["rt"] is not None: randomExerciseSortId = response.json()["rt"]["randomAnswerDetailDto"]["randomExerciseId"] answerList = [i["id"] for i in response.json()["rt"]["optionList"]] questionType = response.json()["rt"]["questionName"] if questionType == '单选题': answer = self.get_answer_radio(randomExerciseSortId, answerList) self.radio(response, INDEX,questionType,answer) elif questionType == '多选题': answer = self.get_answer_multi(randomExerciseSortId,answerList) self.multi(response, INDEX, questionType,answer) elif questionType == '判断题': answer = self.get_answer_judge(randomExerciseSortId) self.judge(response, INDEX,questionType,answer) else: print('异常') INDEX += 1 else: print("异常题号为{}".format(INDEX)) INDEX += 1 def judge(self,response, INDEX,questionType,answer): '''判断''' key1, key2, key3, key4, key5, key6 = self.set_opt(response) id, title, type = self.process_data(INDEX, questionType, response) self.insert_data(id=id,title=title,type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer) def multi(self,response,INDEX,questionType,answer): '''多选''' key1, key2, key3, key4, key5, key6 = self.set_opt(response) id, title, type = self.process_data(INDEX, questionType, response) self.insert_data(id=id, title=title, type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer) def radio(self,response,INDEX,questionType,answer): '''单选''' key1, key2, key3, key4, key5, key6 = self.set_opt(response) id, title, type = self.process_data(INDEX, questionType, response) self.insert_data(id=id, title=title, type=type, key1=key1, key2=key2, key3=key3, key4=key4, key5=key5, key6=key6,answer=answer) def process_data(self, INDEX, questionType, response): '''删除题目中的垃圾字符''' content = response.json()["rt"]["content"] id = INDEX title = str(content).replace(" ","").replace("<p>","").replace("</p>","").replace("<br>","") type = questionType return id, title, type def set_opt(self,response): '''拼接选项,纯为了好看''' optionList = response.json()["rt"]["optionList"] flag = 1 key1, key2, key3, key4, key5, key6 = None,None,None,None,None,None for option in optionList: option_content = option["content"].replace(" ","").replace("<p>","").replace("</p>","") if flag == 1: single = 'A. ' key1 = single + option_content if flag == 2: single = 'B. ' key2 = single + option_content if flag == 3: single = 'C. ' key3 = single + option_content if flag == 4: single = 'D. ' key4 = single + option_content if flag == 5: single = 'E. ' key5 = single + option_content if flag == 6: single = 'F. ' key6 = single + option_content flag += 1 return key1,key2,key3,key4,key5,key6 def insert_data(self,id,title,type,key1=None,key2=None,key3=None,key4=None,key5=None,key6=None,answer=None): '''写入数据到mysql''' try: with self.con.cursor() as cur: sql = 'INSERT INTO xingce (id, title,key1,key2,key3,key4,key5,key6,type,answer) VALUES (%s,%s, %s, %s, %s, %s,%s,%s,%s,%s)' cur.execute(sql, (id,title,key1,key2,key3,key4,key5,key6,type,answer)) self.con.commit() except : self.con.rollback() def get_answer_judge(self,randomExerciseSortId): '''判断题答案获取''' for key in range(0,10): url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(self.uuid) headers = { "Content-Type": "application/json", } data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: if key == 0: return "B" else: return "A" else: pass def get_answer_radio(self, randomExerciseSortId,answerList): '''单选题,答案获取''' for key in answerList: url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format( self.uuid) headers = { "Content-Type": "application/json", } data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: if answerList.index(key) == 0: return "A" elif answerList.index(key) == 1: return "B" elif answerList.index(key) == 2: return "C" elif answerList.index(key) == 3: return "D" elif answerList.index(key) == 4: return "E" elif answerList.index(key) == 5: return "F" else: pass def get_answer_multi(self,randomExerciseSortId,answerList): '''多选题 答案获取''' url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format( self.uuid) headers = { "Content-Type": "application/json", } key_list = self.get_key(answerList) for key in key_list: if len(key)>0: key_new = [str(i) for i in key] key_str = ",".join(key_new) data = {"data": {"answerContent": str(key_str), "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId, "courseId": "10464858"}} res = requests.post(url, headers=headers, data=json.dumps(data)) if res.json()["rt"] == True: r_answer_list = [] for i in key: if answerList.index(i)==0: r_answer_list.append("A") if answerList.index(i)==1: r_answer_list.append("B") if answerList.index(i)==2: r_answer_list.append("C") if answerList.index(i)==3: r_answer_list.append("D") if answerList.index(i)==4: r_answer_list.append("E") if answerList.index(i)==5: r_answer_list.append("F") return ",".join(r_answer_list) else: pass else: pass def get_key(self,answerList): ''' 返回多选题选项所有不重复的组合 ''' lena = len(answerList) sum = [] for i in range(2 ** lena): comb = [] for j in range(lena): if (i >> j) % 2 == 1: comb.append(answerList[j]) sum.append(comb) return sum if __name__ == '__main__': sp = Spider() sp.get_content()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。