赞
踩
- import random, time
- import requests, re
- import datetime
- import mysql.connector
-
- # 定义爬取间隔(minutes)
- interval_time = 15
-
- class HotSearchThread:
- def __init__(self):
- self.curTime = datetime.datetime.now()
- print('[start]开始爬取热搜榜..........')
-
- # 爬取html页面数据
- def getHtml(self):
- url = "https://s.weibo.com/top/summary?cate=realtimehot"
- headers = [
- {
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0"
- },
- {
- "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
- },
- {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
- },
- {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36"
- },
- {
- "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
- }
- ]
- header = headers[random.randint(0,len(headers)-1)]
- response = requests.get(url=url, headers=header)
- return response.content.decode()
-
- # 处理html页面数据,得到绘图所需数据
- def getData(self, html):
- pattern = '<tr.*?<td\s+class="td-01.*?>(\d{1,2})</td>.*?<td\s+class="td-02.*?<a.*?>(.*?)</a>'
- data_rank = re.compile(pattern, re.S).findall(html)
-
- pattern_top = '<tr.*?<td\s+class="td-(\d)1.*?icon-top.*?</td>.*?<td\s+class="td-02.*?<a.*?>(.*?)</a>'
- data_top = re.compile(pattern_top, re.S).findall(html)
-
- data = data_top + data_rank
-
- tail = (datetime.datetime.strftime(self.curTime,'%Y%m%d_%H%M%S'), interval_time)
- for i in range(0,len(data)):
- data[i] += tail
- # print(data)
- print('[success]爬取成功!')
- return data
-
- # 存入数据库
- def saveDAO(self, data):
- # data为 list 类型
- # data[i]为 元组 类型,若len(data)为51,则data[0]为置顶
- # (ranknum, searchItem, time, duration)
- print('[start]开始写入数据库..........')
- conn = mysql.connector.connect(host="127.0.0.1", port=3306, user="WeiboHotSearch",
- password="ws1234.", database="WeiboHotSearch")
- cursor = conn.cursor()
- sql1="SELECT count(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='WeiboHotSearch' AND TABLE_NAME=%s;"
- tablename= "table" + datetime.datetime.strftime(self.curTime,'%Y%m')
- val1=(tablename,)
- cursor.execute(sql1, val1)
- res = cursor.fetchone()
- if res[0]==0:
- sql2="call CreateTablePro(%s);"
- val2=(tablename,)
- cursor.execute(sql2, val2)
- for item in data:
- sql3 = "call SelectDataPro(%s, %s, %s, %s, %s, @cnt);"
- sql4 = "select @cnt;"
- val3 = (tablename, int(item[0]), item[1], item[2], item[3])
- cursor.execute(sql3, val3)
- cursor.execute(sql4)
- cnt = cursor.fetchone()
- if cnt[0]==0:
- sql4="call InsertDataPro(%s, %s, %s, %s, %s)"
- val4=(tablename, int(item[0]), item[1], item[2], item[3])
- cursor.execute(sql4, val4)
- print("[Add ]新增条目:", item)
- else:
- sql5="call UpdateDataPro(%s, %s, %s, %s, %s)"
- val5 = (tablename, int(item[0]), item[1], item[2], item[3])
- cursor.execute(sql5, val5)
- print("[Update]更新条目:", item)
-
- conn.commit()
- cursor.close()
- conn.close()
-
-
- def run(self):
- print("[start] Time: ", datetime.datetime.strftime(self.curTime,'%Y-%m-%d %H:%M:%S'))
- data = self.getData(self.getHtml())
- self.saveDAO(data)
- print("--------------------------------------------------------------")
- print("[over]写入结束!")
- print("--------------------------------------------------------------")
- print("")
- print("")
-
- if __name__ == '__main__':
- print("[Initialized] 开机自启动加载完成!等待运行!")
- time.sleep(60) # 开机等待60秒再运行,防止数据库服务未启动等异常
- while 1:
- HotSearchThread().run()
- time.sleep(interval_time * 60)
防止数据库单个表太大,按月份自动生成表,使用存储过程进行管理
- --- CreateTablePro
- CREATE DEFINER=`WeiboHotSearch`@`%` PROCEDURE `CreateTablePro`(IN `tableName` varchar(50))
- BEGIN
- set @tname = tableName;
- set @sql_create_table = concat(
- 'CREATE TABLE IF NOT EXISTS ', @tname,
- '(num int(2),searchItem varchar(255) primary key,hotTime varchar(255),duration int(5))'
- );
- PREPARE sql_create_table from @sql_create_table;
- EXECUTE sql_create_table;
- END
- --- SelectDataPro
- CREATE DEFINER=`WeiboHotSearch`@`%` PROCEDURE `SelectDataPro`(IN `tableName` varchar(50),IN `num` int(2),IN `searchItem` varchar(255),IN `hotTime` varchar(255),IN `duration` int(5), OUT `cnt` int(5))
- BEGIN
- set @tname = tableName;
- set @num = num;
- set @sItem = searchItem;
- set @hTime = hotTime;
- set @dur = duration;
-
- set @sql_query_data = concat(
- 'select count(@sItem) into @cnt from ', @tname, ' where searchItem = @sItem'
- );
- PREPARE sql_query_data from @sql_query_data;
- EXECUTE sql_query_data;
- set cnt=@cnt;
-
- END
- --- InsertDataPro
- CREATE DEFINER=`WeiboHotSearch`@`%` PROCEDURE `InsertDataPro`(IN `tableName` varchar(50),IN `num` int(2),IN `searchItem` varchar(255),IN `hotTime` varchar(255),IN `duration` int(5))
- BEGIN
- set @tname = tableName;
- set @num = num;
- set @sItem = searchItem;
- set @hTime = hotTime;
- set @dur = 0;
-
- set @sql_insert_data = concat(
- 'insert into ', @tname,' values( @num, @sItem, @hTime, @dur);'
- );
- PREPARE sql_insert_data from @sql_insert_data;
- EXECUTE sql_insert_data;
- END
- --- UpdateDataPro
- CREATE DEFINER=`WeiboHotSearch`@`%` PROCEDURE `UpdateDataPro`(IN `tableName` varchar(50),IN `num` int(2),IN `searchItem` varchar(255),IN `hotTime` varchar(255),IN `duration` int(5))
- BEGIN
- set @tname = tableName;
- set @num = num;
- set @sItem = searchItem;
- set @hTime = hotTime;
- set @dur = duration;
-
- set @sql_insert_data1 = concat(
- 'update ', @tname,' set duration=duration+@dur where searchItem = @sItem'
- );
- PREPARE sql_insert_data1 from @sql_insert_data1;
- EXECUTE sql_insert_data1;
-
-
- set @sql_insert_data = concat(
- 'update ', @tname,' set num=@num, hotTime=@hTime where searchItem = @sItem and num>@num'
- );
- PREPARE sql_insert_data from @sql_insert_data;
- EXECUTE sql_insert_data;
- END
- --- 建表模板
- drop table tablename;
- CREATE table tablename (
- num int(2),
- searchItem varchar(255) primary key,
- hotTime varchar(255),
- duration int(5)
- )
-
- --- 判断数据库中是否存在某张表
- SELECT count(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='WeiboHotSearch' AND TABLE_NAME='test';
-
-
- --- 存储过程的调用
- call CreateTablePro('test')
-
- call InsertDataPro('table202003', 0, '复工复产保卫战', '20200324_165840', 20)
-
- call SelectDataPro('table202003', 0, '复工复产保卫战', '20200324_165840', 20, @cnt);
- select @cnt;
-
- call UpdateDataPro('table202003', 30, '方方', '20200324_165840', 20)
-
- --- select 查询验证
- select * from table202003 where duration = 20
-
-
- --- mysql创建并调用含有out参数的存储过程
- CREATE PROCEDURE sp_add(a int, b int,out c int)
- begin
-
- set c=a+ b;
-
- end;
- --- 调用过程:
- call sp_add (1,2,@a);
- select @a;
-
- # 脚本路径:
- /root/myPyProgram/Weibo.py
- # 脚本日志:
- /root/myPyProgram/logs/Weibo.log
-
- # 设置开机启动
- chmon +x /etc/rd.local
- vim /etc/rd.local
-
- #追加内容:
- /usr/bin/python3 -u /root/myPyProgram/Weibo.py &>> /root/myPyProgram/logs/Weibo.log
-
-
- # 查看进程状态
- ps -aux|grep "Weibo.py" # 会显示开始运行时间,结束时间等
- ps -ef|grep "Weibo.py" # 只显示进程相关信息
-
- # 查询进程id
- ps -ef | grep Weibo.py | grep -v grep | awk '{print $2}'
- # 查询进程id并杀死该进程
- ps -ef | grep Weibo.py | grep -v grep | awk '{print $2}' | xargs kill -9
-
- # 杀死进程
- kill -9 [进程id]
shell 脚本杀死进程 dsp-admin
- echo "开始查询DSP2.0运行的进程编号,查出将其kill"
- dsp_admin_id=`ps -ef | grep dsp-admin | grep -v "grep" | awk '{print $2}'`
- echo $dsp_admin_id
-
- for id in $dsp_admin_id
- do
- kill -9 $id
- echo "killed $id"
- done
1、日志文件创建了,程序执行了,但是日志为0K,tail -f Weibo.log查看为空:
2、/usr/local/bin/python3不存在,需要添加软连接
3、mysql存储过程相关
mysql存储过程用表名做参数 并获取execute执行语句的结果
4、linux查看进程状态
5、Linux中没有rc.local文件的完美解决方法
解决办法步骤一
6、python获取当前系统时间并格式化
7、linux脚本设置开机启动(后台)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。