赞
踩
本系统为大学生毕业设计程序,项目涉及技术为:springboot、mybatis、mysql、vue.js、javascript、html、爬虫、maven等等。
java爬虫部分的代码实现:
- @PostConstruct
- public void initNovelJob() {
-
- new Thread(() -> {
- logger.info("-------------------------------CrawlerTask 开始小说网站数据-------------------------------");
- Document document = null;
- String link = "https://www.zhetian.org/top/lastupdate.html";
- for (int i = 0; i < 3; i++) {
- try {
- document = Jsoup.connect(link).ignoreContentType(true).post();
- } catch (Exception e) {
- System.out.println(link + ",打开失败,重新尝试:" + i);
- }
- if (null != document) break;
- }
- if (null == document) {
- logger.info("-------------------------------CrawlerTask 爬取小说网站数据失败-------------------------------");
- return;
- }
- Element body = document.body();
- Elements nvs = body.select("body > section > div > div.w-left > div > div.body > ul > li > span.n > a:nth-child(2)");
- for (int n = 0; n < nvs.size(); n++) {
- if (n >= 10) break;
- Document nd = null;
- String nlink = nvs.get(n).attr("abs:href");
- for (int i = 0; i < 3; i++) {
- try {
- nd = Jsoup.connect(nlink).ignoreContentType(true).post();
- } catch (Exception e) {
- System.out.println(nlink + ",打开失败,重新尝试:" + i);
- }
- if (null != nd) break;
- }
- if (null == nd) return;
- String name = nd.select("body > section > div:nth-child(2) > div.w-left > div > div.header.line > h1").text();
- String img = nd.select("body > section > div:nth-child(2) > div.w-left > div > div.body.novel > div.novelinfo > div.novelinfo-r > img").attr("abs:src");
- String zz = nd.select("#author > i:nth-child(1) > a").text();
- String lb = nd.select("#author > i:nth-child(2) > a").text();
- String zt = nd.select("#author > i:nth-child(3)").text();
- String dj = nd.select("#stats > i:nth-child(1)").text();
- String tj = nd.select("#stats > i:nth-child(2)").text();
- String sc = nd.select("#stats > i:nth-child(3)").text();
- String gxsj = nd.select("#update > i:nth-child(1)").text();
- String zxzj = nd.select("#update > i:nth-child(2) > a").text();
- String nrjj = nd.select("#intro").text();
- String uuid = Md5Utils.GetMD5Code(nlink);
- template.update("insert ignore into t_novels_app (name,uuid,link,zz,lb,zt,dj,tj,sc,gxsj,zxzj,nrjj,img)" +
- " values(?,?,?,?,?,?,?,?,?,?,?,?,?)", new Object[]{name, uuid, nlink, zz, lb,
- zt, dj, tj, sc, gxsj, zxzj, nrjj, img});
- logger.info("连接:" + nlink + ",抓取成功");
- Elements select = nd.select("body > section > div.card.mt20.fulldir > div.body > ul > li > a");
- List<Object[]> args = Lists.newArrayList();
- select.stream().forEach(item -> {
- String cname = item.text();
- String clink = item.attr("abs:href");
- if (StringUtils.isEmpty(cname) || StringUtils.isEmpty(clink)) return;
- args.add(new Object[]{uuid, cname, clink});
- });
- template.batchUpdate("insert ignore into t_novels_chapter (nid,name,link)" +
- " values(?,?,?)", args);
- }
- logger.info("-------------------------------CrawlerTask 爬取小说网站数据结束-------------------------------");
- }).start();
- }
-
- @PostConstruct
- public void crawlerNovelDetailJob() {
-
- new Thread(() -> {
- logger.info("-------------------------------CrawlerTask 开始遮天小说详情数据-------------------------------");
- List<Map<String, Object>> cps = template.queryForList("SELECT id,link from t_novels_chapter WHERE ISNULL(content) AND status=1 limit 100");
- if (CollectionUtils.isEmpty(cps)) {
- logger.info("-------------------------------CrawlerTask 没有待执行任务,结束-------------------------------");
- return;
- }
- List<Object[]> args = Lists.newArrayList();
- cps.forEach(item -> {
- Document document = null;
- String link = (String) item.get("link");
- for (int i = 0; i < 3; i++) {
- try {
- document = Jsoup.connect(link).ignoreContentType(true).post();
- } catch (Exception e) {
- System.out.println(link + ",打开失败,重新尝试:" + i);
- }
- if (null != document) break;
- }
- if (null == document) {
- return;
- }
- String getUrl = "https://www.zhetian.org" + Regex.get("get\\(\\'(.*)\\'", document.html(), 1);
- if (StringUtils.isBlank(getUrl)) return;
- Document detail = null;
- for (int i = 0; i < 3; i++) {
- try {
- detail = Jsoup.connect(getUrl).ignoreContentType(true).get();
- } catch (Exception e) {
- System.out.println(link + ",打开失败,重新尝试:" + i);
- }
- if (null != detail) break;
- }
- if (null == detail) return;
- JSONObject object = JSONObject.parseObject(detail.body().html().replaceAll("\\<br\\\\\\ \\/\\>", "<br>"));
- String content = object.getString("info");
- if (StringUtils.isBlank(content)) return;
- args.add(new Object[]{content, item.get("id")});
- });
- if (CollectionUtils.isEmpty(args)) {
- logger.info("-------------------------------CrawlerTask 未抓取到具体章节内容,结束-------------------------------");
- return;
- }
- template.batchUpdate("update t_novels_chapter SET content=? WHERE id=? ", args);
- logger.info("-------------------------------CrawlerTask 爬取小说详情数据结束-------------------------------");
- }).start();
- }
小说推荐算法实现
小说排行榜实现
小说加入书架、用户小说书架管理等
用户小说评论、小说推荐、小说阅读
微博登陆、QQ登录
、、、、
代码太多了,就不详细列举了,有需要的可以联系我,拿代码。。。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。