赞
踩
数据库字段设计
word_id:自动递增
python代码如下:
import requests import pymysql from bs4 import BeautifulSoup def downloader(url): """ 下载汉字并保存 """ response = requests.get(url) if response.status_code != 200: print(f'{url} is failed!') return print(f'{url} is parsing') html = BeautifulSoup(response.content.decode('gbk', errors='ignore'), "lxml") a = html.find_all('a', target="_blank") prefix = 'http://www.zd9999.com' words = [prefix + w.get('href') for w in a] res = [] for i in range(0, len(words)): response = requests.get(words[i]) print(f'{[words[i]]} is parsing') if response.status_code != 200: print(f'{words[i]} is failed!') continue wordhtml = BeautifulSoup(response.content.decode('gbk', errors='ignore').replace('<br/>', '\n').replace('<br>', '\n')\ , "lxml") td = wordhtml.find_all('table')[4].find_all('td') word = td[1].text.strip() oldword = td[4].text.strip() pinyin = td[8].text.strip() radicals = td[10].text.strip() explanation = td[12].text.strip() # 连接数据库 conn = pymysql.connect( host='localhost', user='root', # 密码 password='******', # 数据库名称 db='*****', charset='utf8' ) # python必须有一个游标对象,用来给数据库发送sql语句并执行 # 创建游标对象 cur = conn.cursor() # 对于数据库进行增删改查 # insert into word(word,oldword,pinyin,radicals,explanation) values(%s,%s,%s,%s,%s) try: insert_sql = "insert into word(word,oldword,pinyin,radicals,explanation) values(%s,%s,%s,%s,%s)" param = (word, oldword, pinyin, radicals, explanation) cur.execute(insert_sql, param) conn.commit() print("插入数据成功;") except Exception as e: print("插入数据失败:", e) conn.rollback() finally: cur.close() if __name__ == '__main__': downloader('http://www.zd9999.com/zi/index.htm') for i in range(2, 102): downloader(f'http://www.zd9999.com/zi/index_{i}.htm')
效果如下:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。