当前位置:   article > 正文

系统对接,抽取数据接口设计_系统取数接口

系统取数接口
  1. CREATE TABLE extract_task_temp
  2. (
  3. id integer NOT NULL DEFAULT nextval('extract_task_temp_731_id_seq'::regclass),
  4. task_init_time timestamp with time zone, -- 初始化抽取任务时间
  5. task_current_time timestamp with time zone, -- 当前任务抽取时间
  6. task_next_time timestamp with time zone, -- 下一次任务抽取时间
  7. create_time timestamp with time zone DEFAULT now(),
  8. update_time timestamp with time zone, -- 修改时间
  9. task_type integer, -- 任务类型1:文章,2回复
  10. website_id integer, -- 站点类型id
  11. start_size integer, -- 分页起始大小
  12. limit_size integer, -- 分次取多少条数据
  13. cid integer, -- 客户id
  14. authors text, -- 作者昵称
  15. interval_time integer -- 间隔时间单位(分钟)
  16. )

   

  1. package com.cyyun.mobile.tools;
  2. import java.util.ArrayList;
  3. import java.util.Date;
  4. import java.util.HashMap;
  5. import java.util.List;
  6. import java.util.Map;
  7. import java.util.Set;
  8. import javax.annotation.Resource;
  9. import org.apache.commons.collections.CollectionUtils;
  10. import org.apache.commons.lang3.StringUtils;
  11. import org.apache.commons.lang3.time.DateUtils;
  12. import org.apache.log4j.Logger;
  13. import org.springframework.scheduling.annotation.Scheduled;
  14. import org.springframework.stereotype.Component;
  15. import com.cyyun.mobile.dao.ICommentAccountTempDao;
  16. import com.cyyun.mobile.dao.IExtractTaskTempDao;
  17. import com.cyyun.mobile.pojo.CommentAccountTemp;
  18. import com.cyyun.mobile.pojo.ExtractTaskTemp;
  19. import com.cyyun.mobile.service.ExtractTaskTempService;
  20. import com.cyyun.mobile.tools.httpconnection.HttpUrlConnection;
  21. import com.cyyun.mobile.tools.json.JsonEntity;
  22. import com.cyyun.mobile.tools.json.JsonEntityArray;
  23. import com.twmacinta.util.MD5;
  24. /**
  25. * 抽取数据任务
  26. *
  27. * @author zhangzm
  28. *
  29. */
  30. @Component
  31. public class ExtractTask {
  32. static Logger log = Logger.getLogger(ExtractTask.class);
  33. @Resource
  34. IExtractTaskTempDao iExtractTaskTempDao;
  35. @Resource
  36. ICommentAccountTempDao iCommentAccountTempDao;
  37. @Resource
  38. ExtractTaskTempService extractTaskTempService;
  39. /**
  40. * 获取任务对象
  41. *
  42. * @return
  43. */
  44. public List<ExtractTaskTemp> getExtractTaskTemp() {
  45. List<ExtractTaskTemp> extractTaskTemps = null;
  46. try {
  47. extractTaskTemps = extractTaskTempService
  48. .queryExtractTaskTemp(null);
  49. } catch (Exception e) {
  50. log.error(e.getMessage(), e);
  51. }
  52. return extractTaskTemps;
  53. }
  54. @Scheduled(cron = "0 0/1 * * * ?")
  55. public void execute() {
  56. List<ExtractTaskTemp> extractTaskTemps = getExtractTaskTemp();
  57. if (CollectionUtils.isEmpty(extractTaskTemps)) {
  58. log.warn("extractTaskTemps isEmpty");
  59. return;
  60. }
  61. for (ExtractTaskTemp e : extractTaskTemps) {
  62. if (null != e) {
  63. if ("1".equals(String.valueOf(e.getTaskType()))) {
  64. createTask(e);
  65. } else {
  66. createReplyTask(e);
  67. }
  68. }
  69. }
  70. }
  71. /**
  72. * 获取回复数
  73. *
  74. * @param bean
  75. */
  76. public void createReplyTask(ExtractTaskTemp bean) {
  77. if (bean == null) {
  78. log.warn("SpidTaskSynBean is null ");
  79. return;
  80. }
  81. if (null == bean.getTaskType()) {
  82. log.warn("ExtractTaskTemp getTaskType is null ");
  83. return;
  84. }
  85. initTask(bean);
  86. HttpUrlConnection connection = new HttpUrlConnection();
  87. Map<String, String> dataMap = new HashMap<String, String>();
  88. String url = Constant.GET_ARTICLE_REPLY_URL;
  89. dataMap.put("order", "rid");
  90. dataMap.put("desc", "asc");
  91. dataMap.put("cid", String.valueOf(bean.getCid()));
  92. dataMap.put("limit", String.valueOf(bean.getLimitSize()));
  93. dataMap.put("fid", String.valueOf(bean.getWebsiteId()));
  94. dataMap.put("authors", bean.getAuthors());
  95. dataMap.put("from", String.valueOf(bean.getTaskCurrentTime().getTime()));
  96. dataMap.put("to", String.valueOf(bean.getTaskNextTime().getTime()));
  97. StringBuilder logBuilder = new StringBuilder();
  98. logBuilder.append("开始时间为:").append(bean.getCreateTime())
  99. .append("结束时间为:").append(bean.getTaskNextTime())
  100. .append(" url :" + url).append(" dataMap :" + dataMap);
  101. log.info(logBuilder.toString());
  102. String response = null;
  103. try {
  104. response = connection.readData(dataMap, url);
  105. } catch (Exception e) {
  106. log.error(e.getMessage(), e);
  107. return;
  108. }
  109. if ("[]".equals(response)) {
  110. bean.setAuthors(null);
  111. bean.setStartSize(0);
  112. extractTaskTempService.updateExtractTaskTemp(bean);
  113. log.warn("get response is null " + url + " " + dataMap);
  114. return;
  115. }
  116. JsonEntityArray array = new JsonEntityArray(response);
  117. List<Map<String, Object>> addArticleBeans = new ArrayList<Map<String, Object>>();
  118. if (array != null && array.size() > 0) {
  119. for (int i = 0; i < array.size(); i++) {
  120. JsonEntity jsonE = array.getJsonEntity(i);
  121. String rid = jsonE.getString("rid");
  122. Map<String, Object> map = new HashMap<String, Object>();
  123. map.put("rid", Integer.valueOf(rid));
  124. addArticleBeans.add(map);
  125. }
  126. try {
  127. bean.setStartSize(bean.getStartSize() + addArticleBeans.size());
  128. extractTaskTempService
  129. .updateExtractTaskTempAndArticleReplyTemp(bean,
  130. addArticleBeans);
  131. } catch (Exception e) {
  132. log.error(e);
  133. }
  134. }
  135. }
  136. public String getCommentAccountTempName(
  137. List<CommentAccountTemp> accountTemps) {
  138. StringBuilder builder = new StringBuilder();
  139. try {
  140. if (CollectionUtils.isNotEmpty(accountTemps)) {
  141. for (CommentAccountTemp c : accountTemps) {
  142. if (c != null) {
  143. String name = c.getCommentNickname();
  144. builder.append(name).append(",");
  145. }
  146. }
  147. }
  148. } catch (Exception e) {
  149. log.error(e);
  150. }
  151. String s = builder.toString();
  152. if (s.endsWith(",")) {
  153. s = s.substring(0, s.length() - 1);
  154. }
  155. return s;
  156. }
  157. /**
  158. * 初始化任务,设置 开始时间,结束时间,以及账号表中的任务起始时间。(每一次任务表中的结束时间=账号表中的开始时间) 在账号表中的时间会
  159. * 出现的时间范围是 初始化时间+时间间隔*次数
  160. *
  161. * @param bean
  162. */
  163. public void initTask(ExtractTaskTemp bean) {
  164. if (StringUtils.isNotBlank(bean.getAuthors())) {
  165. return;
  166. }
  167. Map<String, Object> map = new HashMap<String, Object>();
  168. map.put("websiteId", bean.getWebsiteId());
  169. map.put("deleteFlag", 1);
  170. map.put("taskTime", bean.getTaskInitTime());
  171. List<CommentAccountTemp> accountTemps = null;
  172. try {
  173. accountTemps = extractTaskTempService.queryCommentAccountTemp(map);
  174. } catch (Exception e) {
  175. log.error(e);
  176. }
  177. if (CollectionUtils.isEmpty(accountTemps)) {
  178. map.clear();
  179. map.put("websiteId", bean.getWebsiteId());
  180. map.put("deleteFlag", 1);
  181. map.put("taskTime", bean.getTaskNextTime());
  182. try {
  183. accountTemps = extractTaskTempService
  184. .queryCommentAccountTemp(map);
  185. bean.setStartSize(0);// 起始页
  186. bean.setTaskCurrentTime(bean.getTaskNextTime());// 当前处理时间
  187. Date taskNextTime = DateUtils.addMinutes(
  188. bean.getTaskNextTime(), bean.getIntervalTime());
  189. bean.setTaskNextTime(taskNextTime);// 下次处理时间
  190. } catch (Exception e) {
  191. log.error(e);
  192. }
  193. } else {
  194. bean.setStartSize(0);// 起始页
  195. bean.setTaskCurrentTime(bean.getTaskInitTime());// 当前处理时间
  196. Date taskNextTime = DateUtils.addMinutes(bean.getTaskInitTime(),
  197. bean.getIntervalTime());
  198. bean.setTaskNextTime(taskNextTime);// 下次处理时间
  199. }
  200. try {
  201. bean.setAuthors(getCommentAccountTempName(accountTemps));// 设置作者
  202. extractTaskTempService.updateExtractTaskTempAndCommentAccountTemp(
  203. bean, accountTemps);
  204. } catch (Exception e) {
  205. log.error(e);
  206. }
  207. }
  208. /**
  209. * 抽取文章数据
  210. *
  211. * @param bean
  212. */
  213. public void createTask(ExtractTaskTemp bean) {
  214. if (bean == null) {
  215. log.warn("SpidTaskSynBean is null ");
  216. return;
  217. }
  218. if (null == bean.getTaskType()) {
  219. log.warn("ExtractTaskTemp getSysTypeId is null ");
  220. return;
  221. }
  222. initTask(bean);
  223. while (true) {
  224. HttpUrlConnection connection = new HttpUrlConnection();
  225. Map<String, String> dataMap = new HashMap<String, String>();
  226. String url = Constant.GET_ARTICLE_URL;
  227. dataMap.put("action", "full");
  228. dataMap.put("sort", "6");
  229. dataMap.put("order", "1");
  230. dataMap.put("start", String.valueOf(bean.getStartSize()));
  231. dataMap.put("cid", String.valueOf(bean.getCid()));
  232. dataMap.put("limit", String.valueOf(bean.getLimitSize()));
  233. dataMap.put("fid", String.valueOf(bean.getWebsiteId()));
  234. dataMap.put("authors", bean.getAuthors());
  235. dataMap.put("from",
  236. String.valueOf(bean.getTaskCurrentTime().getTime()));
  237. dataMap.put("to", String.valueOf(bean.getTaskNextTime().getTime()));
  238. StringBuilder logBuilder = new StringBuilder();
  239. logBuilder.append("开始时间为:").append(bean.getCreateTime())
  240. .append("结束时间为:").append(bean.getTaskNextTime())
  241. .append(" url :" + url).append(" dataMap :" + dataMap);
  242. log.info(logBuilder.toString());
  243. String response = null;
  244. try {
  245. response = connection.readData(dataMap, url);
  246. } catch (Exception e) {
  247. log.error(e.getMessage(), e);
  248. return;
  249. }
  250. JsonEntity jsonEntity = new JsonEntity(response);
  251. int result = Integer.valueOf(jsonEntity.getString("count"));
  252. if (result == 0) {
  253. log.error("抽取文件数据为0条 ");
  254. bean.setAuthors(null);
  255. bean.setStartSize(0);
  256. extractTaskTempService.updateExtractTaskTemp(bean);
  257. return;
  258. } else {
  259. JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
  260. List<Map<String, Object>> addArticleBeans = new ArrayList<Map<String, Object>>();
  261. if (array != null && array.size() > 0) {
  262. for (int i = 0; i < array.size(); i++) {
  263. JsonEntity jsonE = array.getJsonEntity(i);
  264. String aid = jsonE.getString("aid");
  265. Map<String, Object> map = new HashMap<String, Object>();
  266. map.put("aid", Integer.valueOf(aid));
  267. addArticleBeans.add(map);
  268. }
  269. }
  270. try {
  271. bean.setStartSize(bean.getStartSize()
  272. + addArticleBeans.size());
  273. extractTaskTempService.updateExtractTaskTempAndArticleTemp(
  274. bean, addArticleBeans);
  275. } catch (Exception e) {
  276. log.error(e);
  277. }
  278. }
  279. }
  280. }
  281. public void getArticleByGuid(Set<String> guids) {
  282. if (CollectionUtils.isNotEmpty(guids)) {
  283. for (String guid : guids) {
  284. getArticleByGuid(Constant.CID, guid);
  285. }
  286. }
  287. }
  288. /**
  289. * 抽取文章数据
  290. *
  291. * @param bean
  292. */
  293. public void getArticleByGuid(String cid, String guid) {
  294. if (StringUtils.isBlank(cid)) {
  295. log.warn("cid is null ");
  296. return;
  297. }
  298. if (StringUtils.isBlank(guid)) {
  299. log.warn("cid is null ");
  300. return;
  301. }
  302. HttpUrlConnection connection = new HttpUrlConnection();
  303. Map<String, String> dataMap = new HashMap<String, String>();
  304. String url = Constant.GET_ARTICLE_URL;
  305. dataMap.put("cid", cid);
  306. dataMap.put("guid", guid);
  307. String response = null;
  308. try {
  309. response = connection.readData(dataMap, url);
  310. } catch (Exception e) {
  311. log.error(e.getMessage(), e);
  312. return;
  313. }
  314. // 解析json
  315. JsonEntity jsonEntity = new JsonEntity(response);
  316. int result = Integer.valueOf(jsonEntity.getString("count"));
  317. if (result == 0) {
  318. log.error("获取0条数据");
  319. } else {
  320. }
  321. }
  322. /**
  323. * 抽取文章数据
  324. *
  325. * @param bean
  326. */
  327. public static void testGetArticleByGuid(String cid, String guid) {
  328. if (StringUtils.isBlank(cid)) {
  329. log.warn("cid is null ");
  330. return;
  331. }
  332. if (StringUtils.isBlank(guid)) {
  333. log.warn("cid is null ");
  334. return;
  335. }
  336. HttpUrlConnection connection = new HttpUrlConnection();
  337. Map<String, String> dataMap = new HashMap<String, String>();
  338. dataMap.put("cid", cid);
  339. dataMap.put("guid", guid);
  340. String response = null;
  341. try {
  342. response = connection.readData(dataMap, url);
  343. } catch (Exception e) {
  344. log.error(e.getMessage(), e);
  345. return;
  346. }
  347. // 解析json
  348. JsonEntity jsonEntity = new JsonEntity(response);
  349. int result = Integer.valueOf(jsonEntity.getString("count"));
  350. if (result == 0) {
  351. log.error("获取0条数据");
  352. } else {
  353. }
  354. }
  355. /**
  356. * 获取文章内容
  357. *
  358. * @param aid
  359. * @return
  360. */
  361. public String getArticleContent(Long aid) {
  362. HttpUrlConnection connection = new HttpUrlConnection();
  363. Map<String, String> dataMap = new HashMap<String, String>();
  364. String url = Constant.GET_ARTICLE_CONTENT_URL;
  365. dataMap.put("aid", String.valueOf(aid));
  366. String response = null;
  367. try {
  368. response = connection.readData(dataMap, url);
  369. } catch (Exception e) {
  370. log.error(e.getMessage(), e);
  371. return null;
  372. }
  373. if (StringUtils.isNotBlank(response)) {
  374. // 解析json
  375. JsonEntity jsonEntity = new JsonEntity(response);
  376. return jsonEntity.getString("content");
  377. }
  378. return null;
  379. }
  380. public static void testSpidArticle() {
  381. HttpUrlConnection connection = new HttpUrlConnection();
  382. Map<String, String> dataMap = new HashMap<String, String>();
  383. dataMap.put("start", "0");
  384. dataMap.put("cid", "731");
  385. dataMap.put("limit", "5");
  386. dataMap.put("action", "full");
  387. dataMap.put("sort", "6");
  388. dataMap.put("order", "1");
  389. dataMap.put("author", "品味咖啡");
  390. dataMap.clear();
  391. String response = connection.readData(dataMap, url);
  392. JsonEntity jsonEntity = new JsonEntity(response);
  393. JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
  394. }
  395. public static void testArticleReply() {
  396. HttpUrlConnection connection = new HttpUrlConnection();
  397. Map<String, String> dataMap = new HashMap<String, String>();
  398. dataMap.put("start", "0");
  399. dataMap.put("cid", Constant.CID);
  400. dataMap.put("limit", "10");
  401. dataMap.put("author", "最爱看九爷");
  402. String response = connection.readData(dataMap, url);
  403. // JsonEntity jsonEntity = new JsonEntity(response);
  404. JsonEntityArray array = new JsonEntityArray(response);
  405. // 采集完成
  406. // JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
  407. }
  408. public static String getMD5Url(String url) {
  409. if (StringUtils.isBlank(url)) {
  410. return url;
  411. }
  412. try {
  413. MD5 md5 = new MD5();
  414. md5.Update(url);
  415. return md5.asHex();
  416. } catch (Exception e) {
  417. log.error("md5 加密异常", e);
  418. }
  419. return null;
  420. }
  421. public static void main(String[] args) {
  422. }
  423. }

 

 

 

 

  最好把传输数据接口的参数都配置到数据库中,

  每次任务记录好当前任务的参数参数,当宕机或者重启的时候,有利于保存当前的查询参数,有利于下一次 的查询

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号