当前位置:   article > 正文

手机app日志分析系统(三)_usage_logs_v2啥用

usage_logs_v2啥用
  1. 一、创建hive分区表
  2. ----------------------------------------------------
  3. 1.创建数据库
  4. $hive> create database applogsdb;
  5. 2.创建分区表
  6. 编写脚本。
  7. [applogs_create_table.sql]
  8. use applogsdb;
  9. --startup
  10. CREATE external TABLE ext_startup_logs(createdAtMs bigint,appId string,tenantId string,deviceId string,appVersion string,appChannel string,appPlatform string,osType string,deviceStyle string,country string,province string,ipAddress string,network string,carrier string,brand string,screenSize string)PARTITIONED BY (ym string, day string,hm string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE;
  11. --error
  12. CREATE external TABLE ext_error_logs(createdAtMs bigint,appId string,tenantId string,deviceId string,appVersion string,appChannel string,appPlatform string,osType string,deviceStyle string,errorBrief string,errorDetail string)PARTITIONED BY (ym string, day string,hm string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE;
  13. --event
  14. CREATE external TABLE ext_event_logs(createdAtMs bigint,appId string,tenantId string,deviceId string,appVersion string,appChannel string,appPlatform string,osType string,deviceStyle string,eventId string,eventDurationSecs bigint,paramKeyValueMap Map<string,string>)PARTITIONED BY (ym string, day string,hm string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE;
  15. --page
  16. CREATE external TABLE ext_page_logs(createdAtMs bigint,appId string,tenantId string,deviceId string,appVersion string,appChannel string,appPlatform string,osType string,deviceStyle string,pageViewCntInSession int,pageId string,visitIndex int,nextPage string,stayDurationSecs bigint)PARTITIONED BY (ym string, day string,hm string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE;
  17. --usage
  18. CREATE external TABLE ext_usage_logs(createdAtMs bigint,appId string,tenantId string,deviceId string,appVersion string,appChannel string,appPlatform string,osType string,deviceStyle string,singleUseDurationSecs bigint,singleUploadTraffic bigint,singleDownloadTraffic bigint)PARTITIONED BY (ym string, day string,hm string) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE;
  19. 3.执行applogs.sql脚本
  20. $> hive -f /share/umeng/applogs_create_table.sql
  21. 二、使用Linux cron调度,周期性load HDFS的数据到hive的分区表
  22. ----------------------------------------------------------------
  23. 1.解释
  24. 调度就是周期运行指定的任务。
  25. 2.Ubuntu安装Cron
  26. apt-get install cron
  27. 3.调度命令[Ubuntu]
  28. $> /usr/sbin/service cron start
  29. $> /usr/sbin/service cron status
  30. $> /usr/sbin/service cron restart
  31. $> /usr/sbin/service cron stop
  32. 4.调度命令[centos]
  33. //查看状态
  34. $> service crond status
  35. //停止
  36. $>service crond stop
  37. //启动
  38. $> service crond start
  39. 5.配置调度任务
  40. a.[/etc/crontab下]
  41. 0-59|0-23 1-31 1-12 0-6
  42. 分 时 天 月 星期
  43. * * * * * ubuntu source /etc/profile;echo `date` >> ~/1.log
  44. //五个* 表示通配,每分钟执行一次后面的命令 --> source /etc/profile;echo `date` >> ~/1.log
  45. //Linux最小执行间隔为1分钟
  46. 6.date操作
  47. date -d "-3 minute" +%Y%m-%d-%H%M //得到3分钟之前的时间
  48. date -d "3 minute" +%Y%m-%d-%H%M //得到3分钟之后的时间
  49. date -d "3 hour" +%Y%m-%d-%H%M //得到3分钟之后的时间
  50. 7.使用sed命令编辑文件
  51. //删除第一行
  52. $>sed '1d' 1.log
  53. //删除最后一行
  54. $>sed '$d' 1.log
  55. //删除区间行
  56. $>sed '1,3d' 1.log
  57. //删除所有行
  58. $>sed '1,$d' 1.log
  59. //p:print -- 复制每一行,然后打印输出。也就是每一行打印两遍
  60. $>sed '1,$p' 1.log
  61. //-n:安静模式,只显示处理的行 -- 只打印第一行
  62. $>sed -n '1,$p' 1.log
  63. //-i:对源文件进行修改[1,$p]
  64. $>sed -i '1,$p' 1.log
  65. //显示含有hello的行[/.../p]
  66. $>sed -n '/hello/p' 1.log
  67. //追加内容第1行之后追加新行[1a]
  68. $>sed -i '1ahello' 1.log
  69. //追加新行,指定前置字符 [1a]
  70. $>sed -i '1a\ hello' 1.log
  71. //1-3行每行下面都追加新行hello[1,3a] --- append
  72. $>sed -i '1,3ahello' 1.log
  73. //替换,针对整行[1,2c] -- cover
  74. $>sed -i '1,2ckkk' 1.log
  75. //替换,针对特定字符串,用how替换掉hello [s/../../g]
  76. $>sed -i 's/hello/how/g' 1.log
  77. 8.编写脚本,周期性导入hdfs的文件到hive的分区表
  78. [~/Downloads/.exportData.sql]
  79. load data inpath '/data/applogs/startup/${ym}/${day}/${hm}' into table applogsdb.ext_startup_logs partition(ym='${ym}',day='${day}',hm='${hm}');
  80. load data inpath '/data/applogs/error/${ym}/${day}/${hm}' into table applogsdb.ext_error_logs partition(ym='${ym}',day='${day}',hm='${hm}');
  81. load data inpath '/data/applogs/event/${ym}/${day}/${hm}' into table applogsdb.ext_event_logs partition(ym='${ym}',day='${day}',hm='${hm}');
  82. load data inpath '/data/applogs/page/${ym}/${day}/${hm}' into table applogsdb.ext_page_logs partition(ym='${ym}',day='${day}',hm='${hm}');
  83. load data inpath '/data/applogs/usage/${ym}/${day}/${hm}' into table applogsdb.ext_usage_logs partition(ym='${ym}',day='${day}',hm='${hm}');
  84. 9.编写执行脚本 -- 每次只拷贝1分钟时间片的数据。但是数据是3分钟前的那1分钟的数据。
  85. [~/Downloads/exec.sh]
  86. #!/bin/bash
  87. systime=`date -d "-3 minute" +%Y%m-%d-%H%M`
  88. ym=`echo ${systime} | awk -F '-' '{print $1}'`
  89. day=`echo ${systime} | awk -F '-' '{print $2}'`
  90. hm=`echo ${systime} | awk -F '-' '{print $3}'`
  91. cp ~/Downloads/.exportData.sql ~/Downloads/exportData.sql
  92. sed -i 's/${ym}/'${ym}'/g' ~/Downloads/exportData.sql
  93. sed -i 's/${day}/'${day}'/g' ~/Downloads/exportData.sql
  94. sed -i 's/${hm}/'${hm}'/g' ~/Downloads/exportData.sql
  95. #执行hive的命令,注意此处的hive命令一定要写全路径,不然找不到hive
  96. /soft/hive/bin/hive -f ~/Downloads/exportData.sql
  97. rm ~/Downloads/exportData.sql
  98. 10.设定每隔1分钟自动执行脚本exec.sh一次[生产环境一般为一天执行一次。每天的凌晨2点]
  99. $> sudo nano /etc/crontab
  100. * * * * * ubuntu source /etc/profile;~/Downloads/exec.sh
  101. //开启服务
  102. $> /usr/sbin/service cron start
  103. $> /usr/sbin/service cron status
  104. $> /usr/sbin/service cron stop
  105. 三、导出web项目的war包,部署到ubuntu的tomcat上
  106. ---------------------------------------------------------------------
  107. 1.安装tomcat
  108. a.下载安装
  109. apache-tomcat-7.0.72.tar.gz
  110. b.tar开
  111. tar -xzvf ~/Downloads/apache-tomcat-7.0.72.tar.gz -C /soft
  112. c.软连接
  113. $>ln -s /soft/apache-tomcat-7.0.72 /soft/tomcat
  114. 2.导出web项目的war包
  115. a.找到web项目,在pom.xml中添加插件和common依赖
  116. <?xml version="1.0" encoding="UTF-8"?>
  117. <project xmlns="http://maven.apache.org/POM/4.0.0"
  118. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  119. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  120. <modelVersion>4.0.0</modelVersion>
  121. <groupId>com.test</groupId>
  122. <artifactId>app-logs-collect-web</artifactId>
  123. <version>1.0-SNAPSHOT</version>
  124. <packaging>war</packaging>
  125. <build>
  126. <plugins>
  127. <plugin>
  128. <groupId>org.apache.maven.plugins</groupId>
  129. <artifactId>maven-surefire-plugin</artifactId>
  130. <version>2.12.4</version>
  131. <configuration>
  132. <skipTests>true</skipTests>
  133. </configuration>
  134. </plugin>
  135. <plugin>
  136. <artifactId>maven-war-plugin</artifactId>
  137. <version>2.6</version>
  138. <configuration>
  139. <warSourceDirectory>web</warSourceDirectory>
  140. <failOnMissingWebXml>false</failOnMissingWebXml>
  141. <excludes>css/*,images/*,js/*,png/*,phone/*</excludes>
  142. </configuration>
  143. </plugin>
  144. </plugins>
  145. </build>
  146. <dependencies>
  147. <dependency>
  148. <groupId>junit</groupId>
  149. <artifactId>junit</artifactId>
  150. <version>4.11</version>
  151. </dependency>
  152. <dependency>
  153. <groupId>com.fasterxml.jackson.core</groupId>
  154. <artifactId>jackson-core</artifactId>
  155. <version>2.8.8</version>
  156. </dependency>
  157. <dependency>
  158. <groupId>com.fasterxml.jackson.core</groupId>
  159. <artifactId>jackson-databind</artifactId>
  160. <version>2.8.3</version>
  161. </dependency>
  162. <dependency>
  163. <groupId>com.maxmind.db</groupId>
  164. <artifactId>maxmind-db</artifactId>
  165. <version>1.0.0</version>
  166. </dependency>
  167. <dependency>
  168. <groupId>org.springframework</groupId>
  169. <artifactId>spring-webmvc</artifactId>
  170. <version>4.3.5.RELEASE</version>
  171. </dependency>
  172. <dependency>
  173. <groupId>javax.servlet</groupId>
  174. <artifactId>servlet-api</artifactId>
  175. <version>2.5</version>
  176. </dependency>
  177. <dependency>
  178. <groupId>com.alibaba</groupId>
  179. <artifactId>fastjson</artifactId>
  180. <version>1.2.24</version>
  181. </dependency>
  182. <dependency>
  183. <groupId>com.maxmind.db</groupId>
  184. <artifactId>maxmind-db</artifactId>
  185. <version>1.0.0</version>
  186. </dependency>
  187. <dependency>
  188. <groupId>org.apache.kafka</groupId>
  189. <artifactId>kafka_2.11</artifactId>
  190. <version>0.10.0.1</version>
  191. </dependency>
  192. <dependency>
  193. <groupId>com.test</groupId>
  194. <artifactId>app-analyze-common</artifactId>
  195. <version>1.0-SNAPSHOT</version>
  196. </dependency>
  197. </dependencies>
  198. </project>
  199. b.因为涉及到关联的公共模块common,所以,导出war包之前要先安装common模块,使common模块重新打包放到.m2仓库下。
  200. maven --> install common module ...
  201. c.然后打包web服务器程序成war包app-web.war
  202. 3.复制war文件到centos下${tomcat}/webapps
  203. 4.启动tomcat
  204. $>tomcat/bin/startup.sh
  205. 5.验证
  206. $>netstat -anop | grep 8080
  207. 6.开启flume
  208. flume-ng agent -f applog.conf -n a1
  209. 7.修改手机程序连接服务器的地址。
  210. UploadUtil.java
  211. 21行:URL url = new URL("http://s100:8080/app-web/coll/index");
  212. http://s100:8080/app-web/coll/index
  213. 8.至此,数据就收集并上传到hive上了
  214. 四、Hive查询
  215. -----------------------------------------------------------
  216. 1.通过hive查询指定app的用户数[去重]
  217. hive> select count(distinct deviceid) from ext_startup_logs where appid = 'sdk34734';
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/301022
推荐阅读
相关标签
  

闽ICP备14008679号