当前位置:   article > 正文

【Java爬虫】爬取南通大学教务系统成绩计算绩点_java爬取高考分数

java爬取高考分数

  以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

  具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

           1.Get请求方法

  1. //创建一个浏览器客户端
  2. CloseableHttpClient httpClient = HttpClients.createDefault();
  3. //要Get的地址
  4. String url1="http://www.baidu.com";
  5. //创建一个Get请求
  6. HttpGet baidu=new HttpGet(url1);
  7. //用上面创建的浏览器客户端执行该请求
  8. CloseableHttpResponse res=httpClient.execute(baidu);
  9. //用响应创建一个http实体并获得输入流
  10. HttpEntity he=res.getEntity();
  11. InputStream in=he.getContent();
  12. //将获得的流写到本地磁盘
  13. FileOutputStream out=new FileOutputStream("baidu.html'");
  14. byte[] buffer=new byte[1024];
  15. int count=-1;
  16. while((count=in.read(buffer))!=-1)
  17. {
  18. out.write(buffer, 0, count);
  19. }
  20. in.close();
  21. out.close();

   2.Post请求方法

  1. CloseableHttpClient httpClient = HttpClients.createDefault();
  2. String url="http://××××.××××.com?#";
  3. //要提交的参数username,password
  4. List<NameValuePair> list = new ArrayList<NameValuePair>();
  5. list.add(new BasicNameValuePair("Username","Name"));
  6. list.add(new BasicNameValuePair("Password","××××××"));
  7. //转换编码
  8. UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
  9. //创建Post请求
  10. HttpPost httpPost=new HttpPost(url);
  11. //为请求设置参数
  12. httpPost.setEntity(entity);
  13. //获得响应,输入流并写入本地磁盘
  14. CloseableHttpResponse res=httpClient.execute(httpPost);
  15. HttpEntity he=res.getEntity();
  16. InputStream in=he.getContent();
  17. FileOutputStream out=new FileOutputStream("××××.×××");
  18. byte[] buffer=new byte[1024];
  19. int count=-1;
  20. while((count=in.read(buffer))!=-1)
  21. {
  22. out.write(buffer, 0, count);
  23. }
  24. in.close();
  25. out.close();

爬虫的完整代码:

  1. import org.apache.http.impl.client.CloseableHttpClient;
  2. import org.apache.http.impl.client.HttpClients;
  3. import org.apache.http.message.BasicNameValuePair;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6. import java.io.InputStream;
  7. import java.io.UnsupportedEncodingException;
  8. import java.util.ArrayList;
  9. import java.util.List;
  10. import java.util.Scanner;
  11. import java.util.regex.Pattern;
  12. import java.util.regex.Matcher;
  13. import org.apache.http.HttpEntity;
  14. import org.apache.http.NameValuePair;
  15. import org.apache.http.client.ClientProtocolException;
  16. import org.apache.http.client.entity.UrlEncodedFormEntity;
  17. import org.apache.http.client.methods.*;
  18. public class spider02 {
  19. public static void main(String[] args) throws ClientProtocolException, IOException
  20. {
  21. @SuppressWarnings("resource")
  22. Scanner cin=new Scanner(System.in);
  23. doon asd=new doon();
  24. asd.getyzm();
  25. String yzm=cin.nextLine(); //测试
  26. String stop="1";
  27. while(!stop.equals("#"))
  28. {
  29. stop=cin.nextLine();
  30. System.out.println(stop);
  31. if(stop.equals("n"))
  32. {
  33. Matcher name=asd.patternname(asd.getname());
  34. while(name.find())
  35. System.out.println(name.group(1));
  36. }
  37. if(stop.equals("s"))
  38. {
  39. Matcher score=asd.patternscore(asd.getscore());
  40. List<lession> les= asd.workjidian(score);
  41. double jdsum=0,xfsum=0;
  42. for(int i=0;i<les.size();i++)
  43. {
  44. jdsum+=les.get(i).getKcxfjd();
  45. xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();
  46. System.out.println(les.get(i).getKcmc()+"\t"+les.get(i).getZpcj()+"\t"+les.get(i).getXf()+"\t"+les.get(i).getKcxfjd());
  47. }
  48. System.out.println("所修课程学分:"+xfsum);
  49. System.out.println("所修课程学分绩点:"+jdsum);
  50. System.out.println("平均学分绩点:"+jdsum/xfsum);
  51. }
  52. }
  53. }
  54. }
  55. class doon{
  56. private CloseableHttpClient httpClient = HttpClients.createDefault();
  57. public void done(String xh,String sfzh,String kl,String yzm)
  58. {
  59. try {
  60. login(xh, sfzh, kl, yzm); //尝试登陆
  61. getscore(); //获取分数
  62. } catch (ClientProtocolException e) {
  63. e.printStackTrace();
  64. } catch (IOException e) {
  65. e.printStackTrace();
  66. }
  67. }
  68. public String getname()
  69. {
  70. String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx"; //获取个人信息位置
  71. String information="";
  72. //Post请求
  73. List<NameValuePair> list=new ArrayList<NameValuePair>();
  74. list.add(new BasicNameValuePair("xq","2013-2014-1"));
  75. try {
  76. UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
  77. HttpPost post=new HttpPost(url);
  78. post.setEntity(entity);
  79. CloseableHttpResponse res= httpClient.execute(post);
  80. HttpEntity he=res.getEntity();
  81. InputStream in=he.getContent();
  82. //FileOutputStream out=new FileOutputStream("");
  83. byte[] buffer=new byte[1024];
  84. int count=-1;
  85. while((count=in.read(buffer))!=-1)
  86. {
  87. String inf=new String(buffer,0,count);
  88. information+=inf;
  89. }
  90. in.close();
  91. } catch (IOException e) {
  92. // TODO Auto-generated catch block
  93. e.printStackTrace();
  94. }
  95. return information;
  96. }
  97. public void getyzm() throws IOException
  98. {
  99. //获得验证码并写到本地,Get请求
  100. String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx"; //验证码页面
  101. HttpGet yzm=new HttpGet(url1);
  102. CloseableHttpResponse res=httpClient.execute(yzm);
  103. HttpEntity he=res.getEntity();
  104. InputStream in=he.getContent();
  105. FileOutputStream out=new FileOutputStream("yzm.gif");
  106. byte[] buffer=new byte[1024];
  107. int count=-1;
  108. while((count=in.read(buffer))!=-1)
  109. {
  110. out.write(buffer, 0, count);
  111. }
  112. in.close();
  113. out.close();
  114. }
  115. public void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException
  116. {
  117. //Post请求
  118. String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx"; //登录页面
  119. List<NameValuePair> list = new ArrayList<NameValuePair>();
  120. list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));
  121. list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));
  122. list.add(new BasicNameValuePair("xh",xh));
  123. list.add(new BasicNameValuePair("sfzh",sfzh));
  124. list.add(new BasicNameValuePair("kl",kl));
  125. list.add(new BasicNameValuePair("yzm",yzm));
  126. UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
  127. HttpPost httpPost=new HttpPost(url);
  128. httpPost.setEntity(entity);
  129. CloseableHttpResponse res=httpClient.execute(httpPost);
  130. HttpEntity he=res.getEntity();
  131. InputStream in=he.getContent();
  132. FileOutputStream out=new FileOutputStream("ans.html");
  133. byte[] buffer=new byte[1024];
  134. int count=-1;
  135. while((count=in.read(buffer))!=-1)
  136. {
  137. out.write(buffer, 0, count);
  138. }
  139. in.close();
  140. out.close();
  141. }
  142. public String getscore() throws ClientProtocolException, IOException
  143. {
  144. //Post请求
  145. String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx"; //获取分数
  146. List<NameValuePair> list = new ArrayList<NameValuePair>();
  147. list.add(new BasicNameValuePair("start","0"));
  148. list.add(new BasicNameValuePair("pageSize","80"));
  149. UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
  150. HttpPost httpPost=new HttpPost(url);
  151. httpPost.setEntity(entity);
  152. CloseableHttpResponse res=httpClient.execute(httpPost);
  153. HttpEntity he=res.getEntity();
  154. InputStream in=he.getContent();
  155. FileOutputStream out=new FileOutputStream("score.html");
  156. byte[] buffer=new byte[1024];
  157. int count=-1;
  158. String save="";
  159. while((count=in.read(buffer))!=-1)
  160. {
  161. out.write(buffer, 0, count);
  162. String sav=new String(buffer,0,count);
  163. save+=sav;
  164. }
  165. in.close();
  166. out.close();
  167. return save;
  168. }
  169. public Matcher patternscore(String score)
  170. {
  171. //用正则表达式匹配成绩
  172. String reg="\"kcmc\":\"(.*?)\",\"jsxm\":\"(.*?)\",\"xq\":\"(.*?)\",\"xs\":\"(.*?)\",\"xf\":\"(.*?)\",\"zpcj\":\"(.*?)\",\"pscj\":\"(.*?)\",\"qmcj\":\"(.*?)\",\"kcsx\":\"(.*?)\",\"cjid\":\"(.*?)\",\"ksfsm\":\"(.*?)\",\"pxcj\":\"(.*?)\"}";
  173. Pattern p=Pattern.compile(reg);
  174. Matcher m=p.matcher(score);
  175. return m;
  176. }
  177. public Matcher patternname(String name)
  178. {
  179. //匹配个人信息
  180. String reg="<b>(.*?)</b>";
  181. Pattern p=Pattern.compile(reg);
  182. Matcher m=p.matcher(name);
  183. return m;
  184. }
  185. public List<lession> workjidian(Matcher score)
  186. {
  187. //计算绩点
  188. List<lession> les=new ArrayList<lession>();
  189. while(score.find())
  190. {
  191. double xf=0.0;
  192. if(score.group(6).equals("优")) //五级计分
  193. xf=Double.valueOf(score.group(5)).doubleValue()*4.5;
  194. else if(score.group(6).equals("良"))
  195. xf=Double.valueOf(score.group(5)).doubleValue()*3.5;
  196. else if(score.group(6).equals("中"))
  197. xf=Double.valueOf(score.group(5)).doubleValue()*2.5;
  198. else if(score.group(6).equals("及格"))
  199. xf=Double.valueOf(score.group(5)).doubleValue()*1.5;
  200. else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))
  201. continue;
  202. else if(Double.valueOf(score.group(6)).doubleValue()>=90) //百分计分
  203. xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();
  204. else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)
  205. xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();
  206. else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)
  207. xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();
  208. else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)
  209. xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();
  210. else if(Double.valueOf(score.group(6)).doubleValue()<60)
  211. continue;
  212. les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));
  213. // System.out.println(score.group(1)+"\t\t\t\t\t\t"+score.group(2)+"\t"
  214. // +score.group(5)+"\t"+score.group(6)+"学分"+Double.toString(xf));
  215. }
  216. return les;
  217. }
  218. }



声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/311308
推荐阅读
相关标签
  

闽ICP备14008679号