赞
踩
使用beautifulSoup,并导出为json,做个记录。
- import requests
- from bs4 import BeautifulSoup
- import json
-
- # 定义目标URL
- url_pre = "https://www.baidu.com"
- url_suf = "/xx1/x.phtml"
- years = ["1","2","3","4","5"]
-
- # 初始化数据存储字典
- data_dict = {}
-
- for year in years:
- # 发起GET请求获取页面内容
- response = requests.get(url_pre+year+url_suf)
- html_content = response.text
-
- # 使用BeautifulSoup解析HTML内容
- soup = BeautifulSoup(html_content, 'html.parser')
-
- # 找到数据表格所在的标签
- table = soup.find('table', id='id1')
-
- # 遍历表格中的每一行
- all_tr = table.find_all('tr')
- #
- shouRu = all_tr[3]
- column_sr = shouRu.find_all('td')
- if len(column_sr) >= 2:
- key_sr = 'sr'
- value1 = column_sr[1].text.strip().replace(',', '')
- value2 = column_sr[2].text.strip().replace(',', '')
- value3 = column_sr[3].text.strip().replace(',', '')
- value4 = column_sr[4].text.strip().replace(',', '')
- data_dict[year+key_sr+'1'] = value1
- data_dict[year+key_sr+'2'] = value2
- data_dict[year+key_sr+'3'] = value3
- data_dict[year+key_sr+'4'] = value4
-
- #
- chengBen = all_tr[5]
- column_cb = chengBen.find_all('td')
- if len(column_cb) >= 2:
- key_cb = 'cb'
- value1 = column_cb[1].text.strip().replace(',', '')
- value2 = column_cb[2].text.strip().replace(',', '')
- value3 = column_cb[3].text.strip().replace(',', '')
- value4 = column_cb[4].text.strip().replace(',', '')
- data_dict[year+key_cb+'1'] = value1
- data_dict[year+key_cb+'2'] = value2
- data_dict[year+key_cb+'3'] = value3
- data_dict[year+key_cb+'4'] = value4
-
- #
- liRun = all_tr[18]
- column_lr = liRun.find_all('td')
- if len(column_lr) >= 2:
- key_lr = 'lr'
- value1 = column_lr[1].text.strip().replace(',', '')
- value2 = column_lr[2].text.strip().replace(',', '')
- value3 = column_lr[3].text.strip().replace(',', '')
- value4 = column_lr[4].text.strip().replace(',', '')
- data_dict[year+key_lr+'1'] = value1
- data_dict[year+key_lr+'2'] = value2
- data_dict[year+key_lr+'3'] = value3
- data_dict[year+key_lr+'4'] = value4
-
- # 将数据存储字典保存为JSON文件
- output_filename = "data.json"
- with open(output_filename, 'w') as json_file:
- json.dump(data_dict, json_file, ensure_ascii=False, indent=4)
-
- print(f"数据已保存到{output_filename}")
![](https://csdnimg.cn/release/blogv2/dist/pc/img/newCodeMoreWhite.png)
对数据进行处理。
-
-
- import json
- import pandas as pd
- import matplotlib.pyplot as plt
-
- # 将JSON数据解析为字典
- with open('data.json', 'r', encoding='utf-8') as json_file:
- data_dict = json.load(json_file)
-
- # 初始化数据存储字典
- data = {
- "year": [],
- "zongshouru": [],
- "zongchengben": []
- }
-
- # 解析JSON数据并填充数据存储字典
- for year in ["1", "2", "3", "4", "5"]:
- sr_sum = sum(float(data_dict[f"{year}sr{i}"]) for i in range(1, 5))
- cb_sum = sum(float(data_dict[f"{year}cb{i}"]) for i in range(1, 5))
- data["year"].append(year)
- data["zongshouru"].append(sr_sum)
- data["zongchengben"].append(cb_sum)
-
- # 将数据存储字典转换为DataFrame
- df = pd.DataFrame(data)
-
- # 绘制折柱混合图
- plt.figure(figsize=(10, 6))
-
- # 绘制折线图
- plt.plot(df["year"], df["zsr"], marker='o', label="zsr", color="blue")
- plt.plot(df["year"], df["zcb"], marker='o', label="zcb", color="orange")
-
- # 绘制柱状图
- plt.bar(df["year"], df["zsr"], width=0.4, align='center', alpha=0.5, color="blue")
- plt.bar(df["year"], df["zcb"], width=0.4, align='edge', alpha=0.5, color="orange")
-
- plt.xlabel('year')
- plt.ylabel('amount')
- plt.title('this is title(unit: 100K)')
- plt.legend()
- plt.grid(True)
- plt.show()
![](https://csdnimg.cn/release/blogv2/dist/pc/img/newCodeMoreWhite.png)
对数据进行处理的部分2,进行折线图绘制使用echart
- # 根据季报数据,计算出公司近五年(2018到2022年)的营业利润,
- # 分年度展示每一年四个季度的营业利润。
- # 要求:年度可以切换,使用折线图展示
-
- import json
- import pandas as pd
- import matplotlib.pyplot as plt
-
- # 将JSON数据解析为字典
- with open('data.json', 'r', encoding='utf-8') as json_file:
- data_dict = json.load(json_file)
-
- # 初始化数据存储字典
- data = {
- "年份": [],
- "Q1": [],
- "Q2": [],
- "Q3": [],
- "Q4": []
- }
-
- # 解析JSON数据并填充数据存储字典
- for year in ["1", "2", "3", "4", "5"]:
- data["年份"].append(year)
- for quarter in ["Q1", "Q2", "Q3", "Q4"]:
- lr_key = f"{year}lr{quarter[1]}"
- if lr_key in data_dict:
- data[quarter].append(float(data_dict[lr_key]))
- else:
- data[quarter].append(0.0)
-
- # print(data)
-
- # 将数据存储字典转换为DataFrame
- df = pd.DataFrame(data)
-
- # 绘制折线图
- plt.figure(figsize=(10, 6))
- for quarter in ["Q1", "Q2", "Q3", "Q4"]:
- plt.plot(df["年份"], df[quarter], marker='o', label=quarter)
-
- plt.xlabel('year')
- plt.ylabel('lirun')
- plt.title('Company 5 years Every Quarters LiRun')
- plt.legend()
- plt.grid(True)
- plt.show()
![](https://csdnimg.cn/release/blogv2/dist/pc/img/newCodeMoreWhite.png)
记录一下python的学习过程。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。