赞
踩
上班的时候,无聊的时候,偶尔跑去百度看下热搜,所以就萌生出这种想法,通过邮件推送的方式实现效果,首先找到百度热搜的页面 热搜,话不多说,直接开干。
因为是个SpringBoot工程,所以怎么搭建就不详细地讲解了,不懂的同学麻烦自行百度,直接跳到依赖文件,需要的依赖分别是
pom.xml
<dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-jpa</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <!-- HttpClient 请求目标路径时会用到 --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> </dependency> <!--Jsoup 解析页面数据时会用到 --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.3</version> </dependency> <!-- 这个很重要,是阿里云邮件推送的包,邮件功能的实现就靠它了 --> <dependency> <groupId>com.aliyun</groupId> <artifactId>dm20151123</artifactId> <version>1.0.0</version> </dependency> <!-- 解析JSON数据时会用到 --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.66</version> </dependency> </dependencies>
阿里云 环境准备
按步骤依次设置
点击右边的新建域名,根据你购买的域名地址,填入即可
然后就是配置,这很重要,没有配置的话是不能发送邮件的,可根据图中箭头,配置实例来填入。
首先,在配置之前,你要在域名控制台将你的域名去进行DNS解析,如下图所示
附上我的例子
域名的处理之后,就是在上图邮件推送控制台,在你新创建的域名那里,点击配置,配置完毕之后,点击验证即可(参考下我的)
至于发信地址和邮件标签比较简单,为了省(tou)篇(lang)幅,就不一一赘述了,只要第一步设置好值就没什么大问题了
(这过程有什么不懂的,欢迎留言区提出)
Email工具类(封装发送邮件的工具类)
import com.aliyun.dm20151123.*; import com.aliyun.dm20151123.models.SingleSendMailRequest; import com.aliyun.dm20151123.models.SingleSendMailResponse; import com.aliyun.tea.TeaException; import com.aliyun.teaopenapi.models.Config; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; @Component public class EmailUtils { private static final Logger logger = LoggerFactory.getLogger(EmailUtils.class); public static Client createClient(String accessKeyId, String accessKeySecret) throws Exception { Config config = new Config().setAccessKeyId(accessKeyId).setAccessKeySecret(accessKeySecret); config.endpoint = "dm.aliyuncs.com"; return new Client(config); } public static void sendEmail(StringBuffer builder) throws Exception { // 这里的 accessKey, accessSecret是你在阿里云设置的,地址如下 // https://usercenter.console.aliyun.com/ Client client = EmailUtils.createClient("accessKey", "accessSecret"); SingleSendMailRequest singleSendMailRequest = new SingleSendMailRequest(); try { singleSendMailRequest .setAccountName("accountName") .setAddressType(1) .setReplyToAddress(true) .setToAddress("email@163.com") .setSubject("百度实时热搜") .setHtmlBody(builder.toString()); client.singleSendMail(singleSendMailRequest); } catch (TeaException e) { logger.error(e.getMessage()); } logger.info("邮件发送成功!!!"); } }
Http工具类(封装发送http请求的工具类)
import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.util.EntityUtils; import org.springframework.stereotype.Component; import java.io.IOException; @Component public class HttpUtils { private final PoolingHttpClientConnectionManager cm; public HttpUtils() { this.cm = new PoolingHttpClientConnectionManager(); // 设置最大连接数 this.cm.setMaxTotal(100); // 设置每个主机的最大连接数 this.cm.setDefaultMaxPerRoute(10); } /** * 根据请求地址下载页面数据 * * @param url * @return 页面数据 */ public String doGetHtml(String url) { // 获取HttpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(this.cm).build(); // 设置hTTPGet请求对象,设置url地址 HttpGet httpGet = new HttpGet(url); // 设置请求信息 httpGet.setConfig(this.getConfig()); // 浏览器表示 httpGet.addHeader("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"); // 传输的类型 httpGet.addHeader("Cookie", "随意打开一个百度网站,在返回的请求地址里面添加cookie信息即可"); httpGet.addHeader("Accept-Language", "zh-CN"); CloseableHttpResponse response = null; try { // 使用HttpClient发起请求,获取响应 response = httpClient.execute(httpGet); // 解析响应,返回结果 if (response.getStatusLine().getStatusCode()==200) { // 判断响应Entity是否不为空,如果不为空就可以使用EntityUtils if (response.getEntity()!=null) { return EntityUtils.toString(response.getEntity(), "utf8"); } } } catch (IOException e) { e.printStackTrace(); } finally { // 关闭response if (response!=null) { try { response.close(); } catch (IOException e) { e.printStackTrace(); } } } // 返回空字符串 return ""; } // 设置请求信息 private RequestConfig getConfig() { return RequestConfig.custom() .setConnectTimeout(1000) // 创建连接的最长时间 .setConnectionRequestTimeout(500) // 获取连接的最长时间 .setSocketTimeout(10000) // 数据传输的最长时间 .build(); } }
Email工具类是我把阿里云SDK的代码进行封装,Http工具类就是简单封装了Http请求的方法,就不详细讲述了。
具体的实现类
@Component public class BaiduCrawlerTask { private static final Logger logger = LoggerFactory.getLogger(BaiduCrawlerTask.class); @Autowired private HttpUtils httpUtils; @Autowired private EmailUtils emailUtils; private static final String CRAWLER_URL = "https://top.baidu.com/board?tab=realtime"; // 每天 7,12,18,20,23点定时执行 @Scheduled(cron = "0 0 7,12,18,20,23 * * ? ") // @Scheduled(fixedDelay = 1000 * 1000) 可用这行先测试一遍,可以的话,再注释掉用上行的 public void itemTask() throws Exception { String html = this.httpUtils.doGetHtml(CRAWLER_URL); this.parse(html); } private void parse(String html) throws Exception { Document doc = Jsoup.parse(html); // 对新闻数据进行处理 String data = doc.select("._1mm2lDDa53WVJII6NKkYUu").first().childNodes().get(0).toString() .replace("<!--", "").replace("-->", "").substring(8); String resultJson = JSON.parseObject(data).getString("cards"); JSONArray arrayJson = (JSONArray) JSONObject.parse(resultJson); Object content = ((JSONObject) arrayJson.get(0)).get("content"); JSONArray contentArray = (JSONArray) content; String title = ""; String word = ""; String img = ""; String hotScore = ""; String desc = ""; String rawUrl = ""; StringBuilder builder = new StringBuilder(); // 这里拼接成html,是因为如果我试过直接发文本信息, // 阿里云那边当成垃圾邮件给过滤掉,导致邮件发送失败 builder.append("<html>\r\n") .append("<head>\r\n") .append("<meta charset=\"utf-8\">\r\n") .append("<title>百度实时热搜</title>\r\n") .append("</head>\r\n"); int index = 1; for (Object obj : contentArray) { title = ((JSONObject) obj).get("query").toString(); img = (((JSONObject) obj).get("img") == null) ? "" : ((JSONObject) obj).get("img").toString(); hotScore = ((JSONObject) obj).get("hotScore").toString(); desc = (((JSONObject) obj).get("desc") == null) ? "" : ((JSONObject) obj).get("desc").toString(); rawUrl = ((JSONObject) obj).get("rawUrl").toString(); builder.append("<div>") .append("<p>") .append(index) .append("、") .append("<a style=\"text-decoration:none;\" href=\"") .append(rawUrl) .append("\"") .append(">") .append(title) .append("</a> - ") .append("<span style=\"color:#ff0000;font-weight:bolder\">") .append(hotScore) .append("</span></p><p>") .append(desc) .append("</p>") .append("<img src=\"") .append(img) .append("\"/>") .append("</div>") .append("\r\n"); index++; if(index == 11){ break; } } this.emailUtils.sendEmail(builder); builder.append("</body>").append("\r\n").append("</html>"); logger.info("数据抓取完成"); } }
对于编写过程中如果有什么疑问,欢迎留言。感谢观看,✿✿ヽ(°▽°)ノ✿
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。