当前位置:   article > 正文

B站视频下载(VideoHelper)_getvideohelper下载

getvideohelper下载

 

继续上次的知乎爬虫, 这次开始了哔哩哔哩的爬虫实践;

 

首先介绍下如何下载吧: VideoHelper 里面有三种方式下载b站视频。

 

同样的流程, 还是先抓包,分析参数,寻找参数(包括之前的请求包和页面源码),找出视频真实地址, 然后在模拟。

 

抓包是注意几个参数:

aid:每个视频都会有对应的 aid, 包括ep类型的;

cid:弹幕的id, 通过相关api可由cid找到对应的资源列表

ep_id: 就是地址栏上显示的ep类型的id了

 

这里详细的流程我就不介绍了(该软件目前还支持知乎等网站视频, 欢迎star。(*>﹏<*))

 

其中需要注意的是模拟发包是有些请求头是不能掉的, user-agent我就不说了, 比如Referer;

 

另外我发现网上目前仅存的b站的视频爬虫好像大多不支持ep类型的, 不过我那个最近测试是支持了的, 但是vip专属的也是会直接报错;

 

另外注明:该项目参考了you-get的部分api

 

代码结构我想还是比较清晰的,直接上代码吧:

  1. package website;
  2. import bean.BilibiliBean;
  3. import bean.VideoBean;
  4. import org.dom4j.DocumentException;
  5. import org.dom4j.io.SAXReader;
  6. import org.json.JSONArray;
  7. import org.json.JSONObject;
  8. import org.jsoup.Jsoup;
  9. import org.jsoup.nodes.Document;
  10. import org.jsoup.nodes.Element;
  11. import util.DownloadUtil;
  12. import util.HttpUtil;
  13. import util.MD5Encoder;
  14. import java.io.ByteArrayInputStream;
  15. import java.io.File;
  16. import java.io.IOException;
  17. import java.util.*;
  18. import static util.PrintUtil.println;
  19. /**
  20. * 哔哩哔哩: https://www.bilibili.com/
  21. *
  22. * @author Asche
  23. * @date 2018-10-20 18:02:29
  24. * @github https://github.com/asche910
  25. */
  26. public class Bilibili extends BaseSite {
  27. // from aid to cids
  28. private String ApiGetList = "https://www.bilibili.com/widget/getPageList?aid=";
  29. private String AvApi = "http://interface.bilibili.com/v2/playurl?";
  30. private String EpApi = "http://bangumi.bilibili.com/player/web_api/playurl?";
  31. private String SEC_1 = "94aba54af9065f71de72f5508f1cd42e";
  32. private String SEC_2 = "9b288147e5474dd2aa67085f716c560d";
  33. // quality
  34. private final int RESOLUTION_1080 = 112;
  35. private final int RESOLUTION_720 = 64;
  36. private final int RESOLUTION_480 = 32;
  37. private final int RESOLUTION_360 = 15;
  38. private int quality = RESOLUTION_1080;
  39. // private List<String> urls = new ArrayList<>();
  40. private String playUrl;
  41. private String fileName;
  42. private int timeLength;
  43. private int fileSize = 0;
  44. private int aid;
  45. private int cid;
  46. // 视频类型
  47. private final int AV_VIDEO = 1;
  48. private final int EP_VIDEO = 2;
  49. private final int SS_VIDEO = 3;
  50. private int type = AV_VIDEO;
  51. private boolean isSupported;
  52. // ep的关联系列
  53. private List<BilibiliBean> serialList = new ArrayList<>();
  54. // 是否已经解析
  55. private boolean isResolved;
  56. public Bilibili() {
  57. }
  58. /**
  59. * 先获取信息再决定是否下载
  60. * @param playUrl
  61. * @param outputDir
  62. */
  63. public Bilibili(String playUrl, String outputDir) {
  64. if (!isResolved) {
  65. this.playUrl = playUrl;
  66. String[] strs = playUrl.split("/");
  67. for (String str : strs) {
  68. if (str.matches("av\\d{4,}")) {
  69. aid = Integer.parseInt(str.substring(2));
  70. isSupported = true;
  71. break;
  72. } else if(str.matches("ep\\d{4,}")){
  73. type = EP_VIDEO;
  74. isSupported = true;
  75. break;
  76. } else if(str.matches("ss\\d{4,}")){
  77. type = SS_VIDEO;
  78. isSupported = true;
  79. break;
  80. }
  81. }
  82. try {
  83. switch (type) {
  84. case SS_VIDEO:
  85. case EP_VIDEO:
  86. initEp();
  87. String epApi = generateEpApi(EpApi, cid, quality);
  88. println(epApi);
  89. parseEpApiResponse(epApi);
  90. break;
  91. case AV_VIDEO:
  92. initAv();
  93. String avApi = generateAvApi(AvApi, cid, quality);
  94. println(avApi);
  95. parseAvApiResponse(avApi);
  96. break;
  97. }
  98. } catch (Exception e) {
  99. e.printStackTrace();
  100. }
  101. isResolved = true;
  102. }
  103. }
  104. @Override
  105. public void downloadByUrl(String playUrl, String outputDir) {
  106. println("Bilibili start: ");
  107. this.playUrl = playUrl;
  108. String[] strs = playUrl.split("/");
  109. for (String str : strs) {
  110. if (str.matches("av\\d{4,}")) {
  111. aid = Integer.parseInt(str.substring(2));
  112. isSupported = true;
  113. break;
  114. } else if(str.matches("ep\\d{4,}")){
  115. type = EP_VIDEO;
  116. isSupported = true;
  117. break;
  118. } else if(str.matches("ss\\d{4,}")){
  119. type = SS_VIDEO;
  120. isSupported = true;
  121. break;
  122. }
  123. }
  124. try {
  125. if (!isResolved) {
  126. switch (type) {
  127. case SS_VIDEO:
  128. case EP_VIDEO:
  129. initEp();
  130. String epApi = generateEpApi(EpApi, cid, quality);
  131. println(epApi);
  132. parseEpApiResponse(epApi);
  133. break;
  134. case AV_VIDEO:
  135. initAv();
  136. String avApi = generateAvApi(AvApi, cid, quality);
  137. println(avApi);
  138. parseAvApiResponse(avApi);
  139. break;
  140. }
  141. isResolved = true;
  142. }
  143. println("# Title: " + fileName);
  144. println(" -TimeLength: " + timeLength / 1000 / 60 + ":" + String.format("%02d", timeLength / 1000 % 60));
  145. println(" -File Size: " + fileSize / 1024 / 1024 + " M");
  146. download(urls, outputDir);
  147. } catch (Exception e) {
  148. e.printStackTrace();
  149. }
  150. }
  151. /**
  152. * 内部下载入口
  153. *
  154. * @param videoSrcs
  155. * @param outputDir
  156. */
  157. @Override
  158. public void download(List<String> videoSrcs, String outputDir) throws IOException {
  159. Map<String, List<String>> headerMap = new HashMap<>();
  160. // 缺失Referer会导致453错误
  161. headerMap.put("Referer", Collections.singletonList("http://interface.bilibili.com/v2/playurl?appkey=84956560bc028eb7&cid=59389212&otype=json&qn=3&quality=3&type=&sign=4c841d687bb7e479e3111428c6a4d3b8"));
  162. int index = 0;
  163. for (String src : videoSrcs) {
  164. println("Download: " + ++index + "/" + videoSrcs.size());
  165. String fileDir;
  166. if (videoSrcs.size() == 1) {
  167. fileDir = outputDir + File.separatorChar + fileName.replaceAll("[/|\\\\]", "") + ".flv";
  168. } else {
  169. fileDir = outputDir + File.separatorChar + fileName.replaceAll("[/|\\\\]", "") + "【" + index + "】.flv";
  170. }
  171. DownloadUtil.downloadVideo(src, fileDir, headerMap);
  172. }
  173. println("Download: All Done!");
  174. }
  175. @Override
  176. public VideoBean getInfo() {
  177. VideoBean bean = new VideoBean();
  178. bean.setTitle(fileName);
  179. bean.setTimeLength(timeLength / 1000 / 60 + ":" + String.format("%02d", timeLength / 1000 % 60));
  180. bean.setSize(fileSize / 1024 / 1024);
  181. return bean;
  182. }
  183. public List<BilibiliBean> getSerialList(){
  184. return serialList;
  185. }
  186. /**
  187. * cid, fileName
  188. *
  189. * @throws IOException
  190. */
  191. private void initAv() throws IOException {
  192. String result = HttpUtil.getResponseContent(ApiGetList + aid);
  193. JSONObject jb = (JSONObject) new JSONArray(result).get(0);
  194. cid = jb.getInt("cid");
  195. Document doc = Jsoup.connect(playUrl).get();
  196. Element ele = doc.selectFirst("div[id=viewbox_report]").selectFirst("h1");
  197. if (ele.hasAttr("title"))
  198. fileName = ele.attr("title");
  199. }
  200. /**
  201. * cid, fileName and related eps
  202. *
  203. * @throws IOException
  204. */
  205. private void initEp() throws IOException {
  206. Document doc = Jsoup.connect(playUrl).get();
  207. Element ele = doc.body().child(2);
  208. String preResult = ele.toString();
  209. // println(preResult);
  210. String result = preResult.substring(preResult.indexOf("__=") + 3, preResult.indexOf(";(function()"));
  211. // println(result);
  212. JSONObject object = new JSONObject(result);
  213. JSONObject curEpInfo = object.getJSONObject("epInfo");
  214. fileName = object.getJSONObject("mediaInfo").getString("title");
  215. cid = curEpInfo.getInt("cid");
  216. JSONArray ja = object.getJSONArray("epList");
  217. for (Object obj : ja) {
  218. JSONObject epObject = (JSONObject) obj;
  219. int aid = epObject.getInt("aid");
  220. int cid = epObject.getInt("cid");
  221. int duration = epObject.getInt("duration");
  222. int epId = epObject.getInt("ep_id");
  223. String index = epObject.getString("index");
  224. String indexTitle = epObject.getString("index_title");
  225. BilibiliBean bean = new BilibiliBean(aid, cid, duration, epId, index, indexTitle);
  226. serialList.add(bean);
  227. println(bean.toString());
  228. }
  229. }
  230. /**
  231. * timeLength, fileSize, urls
  232. *
  233. * @param avReqApi
  234. * @throws IOException
  235. */
  236. private void parseAvApiResponse(String avReqApi) throws IOException {
  237. String result = HttpUtil.getResponseContent(avReqApi);
  238. // println(result);
  239. JSONObject jsonObject = new JSONObject(result);
  240. timeLength = jsonObject.getInt("timelength");
  241. JSONArray ja = jsonObject.getJSONArray("durl");
  242. Iterator<Object> iterator = ja.iterator();
  243. while (iterator.hasNext()) {
  244. JSONObject jb = (JSONObject) iterator.next();
  245. String videoSrc = jb.getString("url");
  246. urls.add(videoSrc);
  247. fileSize += jb.getInt("size");
  248. }
  249. }
  250. /**
  251. * timeLength, fileSize, urls
  252. *
  253. * @param epReqApi
  254. * @throws IOException
  255. * @throws DocumentException
  256. */
  257. private void parseEpApiResponse(String epReqApi) throws IOException, DocumentException {
  258. String response = HttpUtil.getResponseContent(epReqApi);
  259. SAXReader reader = new SAXReader();
  260. org.dom4j.Element rootElement = reader.read(new ByteArrayInputStream(response.getBytes("utf-8"))).getRootElement();
  261. timeLength = Integer.parseInt(rootElement.element("timelength").getText().trim());
  262. List<org.dom4j.Element> elements = rootElement.elements("durl");
  263. for (org.dom4j.Element ele : elements) {
  264. int curSize = Integer.parseInt(ele.element("size").getText());
  265. fileSize += curSize;
  266. String url = ele.element("url").getText();
  267. urls.add(url);
  268. }
  269. println(fileName + ": " + fileSize / 1024 / 1024 + "M");
  270. }
  271. /**
  272. * 生成av类型视频下载信息的api请求链接
  273. *
  274. * @param url
  275. * @param cid
  276. * @param quality
  277. * @return
  278. */
  279. private String generateAvApi(String url, int cid, int quality) {
  280. String paramStr = String.format("appkey=84956560bc028eb7&cid=%d&otype=json&qn=%d&quality=%d&type=", cid, quality, quality);
  281. try {
  282. String checkSum = MD5Encoder.md5(paramStr + SEC_1).toLowerCase();
  283. return url + paramStr + "&sign=" + checkSum;
  284. } catch (Exception e) {
  285. e.printStackTrace();
  286. }
  287. return null;
  288. }
  289. /**
  290. * 生成ep类型视频下载信息的api请求链接
  291. *
  292. * @param url
  293. * @param cid
  294. * @param quality
  295. * @return
  296. */
  297. private String generateEpApi(String url, int cid, int quality) {
  298. String paramStr = String.format("cid=%d&module=bangumi&player=1&quality=%d&ts=%s",
  299. cid, quality, System.currentTimeMillis() / 1000 + "");
  300. try {
  301. String checkSum = MD5Encoder.md5(paramStr + SEC_2).toLowerCase();
  302. return url + paramStr + "&sign=" + checkSum;
  303. } catch (Exception e) {
  304. e.printStackTrace();
  305. }
  306. return null;
  307. }
  308. }

 

 

 完整代码位于:

 https://github.com/asche910/VideoHelper 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/900153
推荐阅读
相关标签
  

闽ICP备14008679号