当前位置:   article > 正文

用户画像_setscale(3, 4);

setscale(3, 4);
  1. 一、用户画像项目分析
  2. -------------------------------------------------------
  3. 1.概念
  4. 用户画像也叫用户信息标签化、客户信息。 根据用户的信息和行为动作,用一些标签把用户描绘出来,描绘的标签就是用户画像。
  5. 2.项目简介
  6. 通过手机安装的app,判断机主的年龄区间和性别
  7. 通过对app设置性别和年龄段的相应权重结合用户每天使用app的时长和次数
  8. //核心算法
  9. /** 性别融合 */
  10. public void protraitSex(double newdata1, double newdata2, long factor) {
  11. double sum = (this.data1 + this.data2 + (newdata1 + newdata2) * factor);
  12. if(sum != 0){
  13. this.data1 = (this.data1 + newdata1 * times) / sum;
  14. this.data2 = (this.data2 + newdata2 * times) / sum;
  15. }
  16. }
  17. 3.几个概念
  18. a.appID:用于标记APP,APP的唯一编号,判断终端安装的APP
  19. b.年龄段说明
  20. 名称 ==> 说明
  21. 年龄段1 ==> 24岁以下
  22. 年龄段2 ==> 25-30岁
  23. 年龄段3 ==> 31-35岁
  24. 年龄段4 ==> 36-40岁
  25. 年龄段5 ==> 40岁以上
  26. c.标签库
  27. 标签库的设计,主要是针对应用市场上主流的App,对App进行分类,App分类信息包括:App名称,男女权重信息,各个年龄段的权重信息。
  28. AppID|App名称|男性权重|女性权重|年龄段1|年龄段2|年龄段3|年龄段4|年龄段5
  29. 10001|QQ|0.001|0.001|0|0.2|0.3|0.2|0.3
  30. 10002|飞信|0.001|0.001|0|0.2|0.3|0.2|0.3
  31. 10003|MSN|0.001|0.001|0|0.2|0.3|0.2|0.3
  32. 10004|阿里旺旺|0.001|0.001|0|0.2|0.3|0.2|0.3
  33. 10005|微信|0.001|0.001|0|0.2|0.3|0.2|0.3
  34. 10006|陌陌|0.001|0.001|0|0.2|0.3|0.2|0.3
  35. 10007|米聊|0.001|0.001|0|0.2|0.3|0.2|0.3
  36. 10008|啪啪|0.001|0.001|0|0.2|0.3|0.2|0.3
  37. 10009|飞聊|0.001|0.001|0|0.2|0.3|0.2|0.3
  38. 10010|来往|0.001|0.001|0|0.2|0.3|0.2|0.3
  39. 10011|连我|0.001|0.001|0|0.2|0.3|0.2|0.3
  40. 10012|有你|0.001|0.001|0|0.2|0.3|0.2|0.3
  41. 4.画像数据
  42. 10011|连我|0.001|0.001|0|0.2|0.3|0.2|0.3
  43. 10012|有你|0.001|0.001|0|0.2|0.3|0.2|0.3
  44. 10013|Kakao Talk|0.001|0.001|0|0.2|0.3|0.2|0.3
  45. 10014|Whatsapp|0.001|0.001|0|0.2|0.3|0.2|0.3
  46. 10015|比邻|0.001|0.001|0|0.2|0.3|0.2|0.3
  47. 20016|新浪读书|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  48. 20017|潇湘书院|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  49. 20018|红袖添香|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  50. 20019|纵横中文网|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  51. 20020|掌上书院|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  52. 20021|和阅读|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  53. 20022|掌阅iReader|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  54. 20023|QQ阅读|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  55. 20024|百阅|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  56. 20025|塔读小说|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  57. 20026|Flipboard|0.001|0.001|0.1|0.3|0.3|0.2|0.1
  58. ...
  59. ...
  60. 5.汇总周期
  61. 每天汇总一次,建议当天凌晨统计前一天数据
  62. 6.注意事项
  63. a.必备条件:性别比例、年龄比例必须在识别出AppID后进行,如果AppID编号为空,则不做处理。
  64. b.性别和年龄的判断,都依赖于标签库的设计,也就是用户手机安装的App,根据手机App计算用户的性别和年龄权重信息,最后判断用户的年龄和性别。
  65. 二、开始项目
  66. -----------------------------------------------------
  67. 1.编写用户画像类
  1. package userdraw;
  2. import java.math.BigDecimal;
  3. /**
  4. * 用户画像
  5. **/
  6. public class UserDraw {
  7. // 属性
  8. private String statTimeDay;
  9. private String MDN; //手机号
  10. private double male; //男性
  11. private double female; // 女性
  12. private double age1;
  13. private double age2;
  14. private double age3;
  15. private double age4;
  16. private double age5;
  17. // 重写toString
  18. public String toString() {
  19. StringBuffer sb = new StringBuffer();
  20. sb.append(statTimeDay).append("|");
  21. sb.append(MDN).append("|");
  22. sb.append(new BigDecimal(male).setScale(3, 4).doubleValue()).append("|");
  23. sb.append(new BigDecimal(female).setScale(3, 4).doubleValue()).append("|");
  24. sb.append(new BigDecimal(age1).setScale(3, 4).doubleValue()).append("|");
  25. sb.append(new BigDecimal(age2).setScale(3, 4).doubleValue()).append("|");
  26. sb.append(new BigDecimal(age3).setScale(3, 4).doubleValue()).append("|");
  27. sb.append(new BigDecimal(age4).setScale(3, 4).doubleValue()).append("|");
  28. sb.append(new BigDecimal(age5).setScale(3, 4).doubleValue()).append("|");
  29. return sb.toString();
  30. }
  31. // 融合方法
  32. /** 性别融合 */
  33. public void protraitSex(double male2, double female2, long times) {
  34. double sum = (this.male + this.female + (male2 + female2) * times);
  35. if(sum != 0){
  36. this.male = (this.male + male2 * times) / sum;
  37. this.female = (this.female + female2 * times) / sum;
  38. }
  39. }
  40. /** 年龄段融合 */
  41. public void protraitAge(double pAge1, double pAge2, double pAge3, double pAge4, double pAge5, long times) {
  42. double sum = (age1 + age2 + age3 + age4 + age5 ) // 之前的APP的
  43. + (pAge1 + pAge2 + pAge3 + pAge4 + pAge5 ) * times;// 当前的APP的
  44. if(sum != 0){
  45. this.age1 = (pAge1 * times + age1) / sum;
  46. this.age2 = (pAge2 * times + age2) / sum;
  47. this.age3 = (pAge3 * times + age3) / sum;
  48. this.age4 = (pAge4 * times + age4) / sum;
  49. this.age5 = (pAge5 * times + age5) / sum;
  50. }
  51. }
  52. /** 初始化男女概率 */
  53. public void initSex(float male, float female) {
  54. float sum = male + female;
  55. if(sum != 0){
  56. this.male = male / sum;
  57. this.female = female / sum;
  58. }
  59. }
  60. /** 初始化年龄段概率 */
  61. public void initAge(float pAge1, float pAge2, float pAge3, float pAge4, float pAge5) {
  62. float sum = pAge1 + pAge2 + pAge3 + pAge4 + pAge5;
  63. if(sum != 0){
  64. this.age1 = pAge1 / sum;
  65. this.age2 = pAge2 / sum;
  66. this.age3 = pAge3 / sum;
  67. this.age4 = pAge4 / sum;
  68. this.age5 = pAge5 / sum;
  69. }
  70. }
  71. // setter and getter method
  72. public String getStatTimeDay() {
  73. return statTimeDay;
  74. }
  75. public void setStatTimeDay(String statTimeDay) {
  76. this.statTimeDay = statTimeDay;
  77. }
  78. public String getMDN() {
  79. return MDN;
  80. }
  81. public void setMDN(String mDN) {
  82. MDN = mDN;
  83. }
  84. public double getMale() {
  85. return male;
  86. }
  87. public double getFemale() {
  88. return female;
  89. }
  90. public double getAge1() {
  91. return age1;
  92. }
  93. public double getAge2() {
  94. return age2;
  95. }
  96. public double getAge3() {
  97. return age3;
  98. }
  99. public double getAge4() {
  100. return age4;
  101. }
  102. public double getAge5() {
  103. return age5;
  104. }
  105. }
  1. 2.编写主入口函数
  1. public static void main(String[] args) throws Exception {
  2. Configuration conf = new Configuration();
  3. Job job1 = Job.getInstance(conf, "UserDrawMapReduceJob1");
  4. job1.setJarByClass(UserDrawMapReduce.class);
  5. job1.setMapperClass(MyMap.class);
  6. job1.setReducerClass(MyReduce.class);
  7. job1.setMapOutputKeyClass(Text.class);
  8. job1.setMapOutputValueClass(TextArrayWritable.class);
  9. job1.setOutputKeyClass(Text.class);
  10. job1.setOutputValueClass(Text.class);
  11. job1.setInputFormatClass(TextInputFormat.class);
  12. job1.setOutputFormatClass(TextOutputFormat.class);
  13. FileInputFormat.addInputPath(job1, new Path("file:///D:\\share\\project\\userdraw\\data"));// 输入路径
  14. FileOutputFormat.setOutputPath(job1, new Path("file:///D:\\share\\project\\userdraw\\out"));// 输出路径
  15. Boolean state1 = job1.waitForCompletion(true);
  16. System.out.println("job1执行成功!!!");
  17. if (state1) {
  18. conf = new Configuration();
  19. Job job2 = Job.getInstance(conf, "UserDrawMapReduceJob2");
  20. job2.setJarByClass(UserDrawMapReduce.class);
  21. job2.setMapperClass(MyMap2.class);
  22. job2.setReducerClass(MyReduce2.class);
  23. job2.setMapOutputKeyClass(Text.class);
  24. job2.setMapOutputValueClass(Text.class);
  25. job2.setOutputKeyClass(Text.class);
  26. job2.setOutputValueClass(Text.class);
  27. job2.setInputFormatClass(TextInputFormat.class);
  28. job2.setOutputFormatClass(TextOutputFormat.class);
  29. FileInputFormat.addInputPath(job2, new Path("file:///D:\\share\\project\\userdraw\\out"));// 输入路径
  30. FileOutputFormat.setOutputPath(job2, new Path("file:///D:\\share\\project\\userdraw\\out2"));// 输出路径
  31. Boolean state2 = job2.waitForCompletion(true);
  32. System.out.println("job2执行成功!!!");
  33. if (state2) {
  34. conf = new Configuration();
  35. // 设置zookeeper
  36. conf.set(UserDrawMapReduce.conf.consite, UserDrawMapReduce.conf.hbaseip);
  37. // 设置hbase表名称
  38. conf.set(TableOutputFormat.OUTPUT_TABLE, UserDrawMapReduce.conf.tableDraw);
  39. // 将该值改大,防止hbase超时退出
  40. conf.set(UserDrawMapReduce.conf.coftime, UserDrawMapReduce.conf.time);
  41. Job job3 = Job.getInstance(conf,
  42. "UserDrawPutInHbase");
  43. job3.setJarByClass(UserDrawMapReduce.class);
  44. TableMapReduceUtil.addDependencyJars(job3);
  45. FileInputFormat.setInputPaths(job3, new Path("file:///D:\\share\\project\\userdraw\\out2"));
  46. job3.setMapperClass(UserDrawPutInHbaseMap.class);
  47. job3.setMapOutputKeyClass(Text.class);
  48. job3.setMapOutputValueClass(Text.class);
  49. job3.setReducerClass(UserDrawPutInHbaseReduce.class);
  50. job3.setOutputFormatClass(TableOutputFormat.class);
  51. job3.waitForCompletion(true);
  52. }
  53. }
  54. }
  1. 3.编写MR作业类
  2. a.第一次MR作业
  1. public class UserDrawMapReduce {
  2. public static Config conf = new Config();
  3. public static class MyMap extends Mapper<LongWritable, Text, Text, TextArrayWritable> {
  4. Text k = new Text();
  5. public void map(LongWritable key, Text value, Context context)
  6. throws IOException, InterruptedException {
  7. //一行文本
  8. String line = value.toString();
  9. //通过 | 进行切割
  10. String[] dataArray = line.split(conf.Separator);
  11. //唯一标识:手机号+appid
  12. String uiqkey = dataArray[Integer.parseInt(conf.MDN)]
  13. + dataArray[Integer.parseInt(conf.appID)]; // MDN + appID
  14. String[] val = new String[5];
  15. //时间
  16. String timenow = dataArray[Integer.parseInt(conf.Date)];
  17. SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
  18. val[0] = sdf.format(Long.parseLong(timenow));//时间
  19. val[1] = dataArray[Integer.parseInt(conf.MDN)];// 手机号
  20. val[2] = dataArray[Integer.parseInt(conf.appID)];// appID
  21. val[3] = "1";// 计数
  22. val[4] = dataArray[Integer.parseInt(conf.ProcedureTime)];// 使用时长
  23. k.set(uiqkey);
  24. context.write(k, new TextArrayWritable(val));
  25. }
  26. }
  27. /**
  28. * 统计 每个软件使用的总次数和总时间,按照unikey进行聚合
  29. */
  30. public static class MyReduce extends Reducer<Text, TextArrayWritable, Text, Text> {
  31. Text v = new Text();
  32. public void reduce(Text key, Iterable<TextArrayWritable> values,
  33. Context context) throws IOException, InterruptedException {
  34. long sum = 0;
  35. int count = 0;
  36. String[] res = new String[5];
  37. boolean flg = true;
  38. for (TextArrayWritable t : values) {
  39. String[] vals = t.toStrings();
  40. if (flg) {
  41. res = vals;
  42. }
  43. if (vals[3] != null) {
  44. count = count + 1;
  45. }
  46. if (vals[4] != null) {
  47. sum += Long.valueOf(vals[4]);
  48. }
  49. }
  50. res[3] = String.valueOf(count);
  51. res[4] = String.valueOf(sum);
  52. StringBuffer sb = new StringBuffer();
  53. sb.append(res[0]).append("|");// 时间
  54. sb.append(res[1]).append("|");// 手机号
  55. sb.append(res[2]).append("|");// appID
  56. sb.append(res[3]).append("|");// 计数
  57. sb.append(res[4]);// 使用时长
  58. v.set(sb.toString());
  59. context.write(null, v);
  60. }
  61. }
  62. }
  1. b.第二次MR作业
  1. package userdrawmr;
  2. import java.io.IOException;
  3. import java.util.HashMap;
  4. import java.util.Map;
  5. import java.util.Set;
  6. import org.apache.hadoop.io.LongWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Mapper;
  9. import org.apache.hadoop.mapreduce.Reducer;
  10. import userdraw.UserDraw;
  11. import util.LoadHdfsTable;
  12. public class UserDrawMapReduce2 {
  13. public static class MyMap2 extends Mapper<LongWritable, Text, Text, Text> {
  14. Text k = new Text();
  15. //更改key为手机号
  16. public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  17. String line = value.toString();
  18. String[] dataArray = line.split("\\|");
  19. String newkey = dataArray[1] ; // MDN手机号
  20. k.set(newkey);
  21. context.write(k, value);
  22. }
  23. }
  24. public static class MyReduce2 extends Reducer<Text, Text, Text, Text> {
  25. Map<String, String[]> appMap = LoadHdfsTable.getAppMap();
  26. Text v = new Text();
  27. public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
  28. Map<String, UserDraw> userDrawMap = new HashMap<String, UserDraw>();
  29. Set<String> keySet = userDrawMap.keySet();
  30. String keyMDN = null;
  31. for (Text t : values) {
  32. String[] dataArray = t.toString().split("\\|");
  33. keyMDN = dataArray[1]; // 用户MDN
  34. String appID = dataArray[2]; // APPID
  35. // 根据appID获取对应的标签信息
  36. if (appID.length() > 0) { // appID不能为空
  37. if (appMap.get(appID) == null) {
  38. continue;
  39. }
  40. String favourite = appMap.get(appID)[2];
  41. float male = Float.parseFloat(appMap.get(appID)[1]);
  42. float female = Float.parseFloat(appMap.get(appID)[2]);
  43. float age1 = Float.parseFloat(appMap.get(appID)[3]);
  44. float age2 = Float.parseFloat(appMap.get(appID)[4]);
  45. float age3 = Float.parseFloat(appMap.get(appID)[5]);
  46. float age4 = Float.parseFloat(appMap.get(appID)[6]);
  47. float age5 = Float.parseFloat(appMap.get(appID)[7]);
  48. long times = Long.parseLong(dataArray[4]);
  49. if (userDrawMap.containsKey(keyMDN)==true) {
  50. UserDraw userDraw = userDrawMap.get(keyMDN);
  51. // 性别权重
  52. userDraw.protraitSex(male, female, times);
  53. // 年龄段权重
  54. userDraw.protraitAge(age1, age2, age3, age4, age5, times);
  55. } else {
  56. userDrawMap.put(keyMDN, createDrawData(dataArray, favourite, male, female, age1, age2, age3, age4, age5, times));
  57. }
  58. }
  59. }
  60. for (String keys : keySet) {
  61. v.set(userDrawMap.get(keys).toString());
  62. context.write(null, v);
  63. }
  64. }
  65. }
  66. // 创建画像数据
  67. private static UserDraw createDrawData(String[] dataArray, //
  68. String favourite, //兴趣爱好
  69. float male, float female, //性别
  70. float age1, float age2, float age3, float age4, float age5, //年龄
  71. long times) {
  72. UserDraw userDraw = new UserDraw();
  73. userDraw.setStatTimeDay(dataArray[0]);
  74. userDraw.setMDN(dataArray[1]);
  75. // 初始化
  76. userDraw.initAge(age1, age2, age3, age4, age5);
  77. userDraw.initSex(male, female);
  78. return userDraw;
  79. }
  80. }
  1. c.第三次MR作业,将数据写入到Hbase
  1. package userdrawputinhbase;
  2. import java.io.IOException;
  3. import org.apache.hadoop.io.LongWritable;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Mapper;
  6. public class UserDrawPutInHbaseMap extends Mapper<LongWritable, Text, Text, Text>{
  7. Text k2 = new Text();
  8. Text v2 = new Text();
  9. @Override
  10. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
  11. throws IOException, InterruptedException {
  12. String line = value.toString();
  13. String[] splited = line.split("\\|");
  14. k2.set(splited[1]);
  15. v2.set(line);
  16. context.write(k2, v2);
  17. }
  18. }
  1. --------------------------------------------------------------------
  1. package userdrawputinhbase;
  2. import java.io.IOException;
  3. import org.apache.hadoop.hbase.client.Durability;
  4. import org.apache.hadoop.hbase.client.Mutation;
  5. import org.apache.hadoop.hbase.client.Put;
  6. import org.apache.hadoop.hbase.mapreduce.TableReducer;
  7. import org.apache.hadoop.hbase.util.Bytes;
  8. import org.apache.hadoop.io.NullWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. public class UserDrawPutInHbaseReduce extends TableReducer<Text, Text, NullWritable>{
  12. @SuppressWarnings("deprecation")
  13. @Override
  14. protected void reduce(Text k2, Iterable<Text> val,
  15. Reducer<Text, Text, NullWritable,Mutation>.Context context)
  16. throws IOException, InterruptedException {
  17. for (Text v2 : val) {
  18. String[] splited = v2.toString().split("\\|");
  19. //rowkey
  20. if(k2.toString().length()!=0){
  21. Put put = new Put(Bytes.toBytes(k2.toString()));
  22. //跳过写入Hlog,提高写入速度
  23. put.setDurability(Durability.SKIP_WAL);
  24. put.add(Bytes.toBytes("draw"), Bytes.toBytes("mdn"), Bytes.toBytes(splited[1]));
  25. put.add(Bytes.toBytes("draw"), Bytes.toBytes("male"), Bytes.toBytes(splited[2]));
  26. put.add(Bytes.toBytes("draw"), Bytes.toBytes("female"), Bytes.toBytes(splited[3]));
  27. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age1"), Bytes.toBytes(splited[4]));
  28. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age2"), Bytes.toBytes(splited[5]));
  29. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age3"), Bytes.toBytes(splited[6]));
  30. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age4"), Bytes.toBytes(splited[7]));
  31. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age5"), Bytes.toBytes(splited[8]));
  32. context.write(NullWritable.get(), put);
  33. }
  34. }
  35. }
  36. }
  1. 4.工具类
  2. a.Config.class
  1. package util;
  2. import java.io.IOException;
  3. import java.io.InputStream;
  4. import java.util.Properties;
  5. public class Config {
  6. static Properties properties;
  7. static{
  8. properties = new Properties();
  9. //可以获取资源文件
  10. InputStream inStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("UserDraw.properties");
  11. try {
  12. properties.load(inStream);
  13. } catch (IOException e) {
  14. e.printStackTrace();
  15. }
  16. }
  17. //UserDraw
  18. public String Separator = properties.getProperty("Separator");
  19. public String Date = properties.getProperty("Date");
  20. public String MDN = properties.getProperty("MDN");
  21. public String appID = properties.getProperty("appID");
  22. public String count = properties.getProperty("count");
  23. public String ProcedureTime = properties.getProperty("ProcedureTime");
  24. //Hbase
  25. public String consite = properties.getProperty("consite");
  26. public String hbaseip = properties.getProperty("hbaseip");
  27. public String coftime = properties.getProperty("coftime");
  28. public String time = properties.getProperty("time");
  29. public String tableDraw = properties.getProperty("tableDraw");
  30. }
  1. b.LoadHdfsTable.class
  1. package util;
  2. import java.io.BufferedReader;
  3. import java.util.HashMap;
  4. import java.util.Map;
  5. public class LoadHdfsTable {
  6. public static String appTab = "/home/appTab.txt";
  7. private static Map<String, String[]> appMap = new HashMap<String, String[]>();
  8. static {
  9. try {
  10. StringBuffer sb = new StringBuffer();
  11. String line = null;
  12. BufferedReader breader = ReadHdfsFile.fileReader(appTab);
  13. while ((line = breader.readLine()) != null) {
  14. String[] appArray = line.split("\\|");
  15. // appName
  16. sb.append(appArray[1]).append(",");
  17. // 性别权重
  18. sb.append(appArray[2]).append(",").append(appArray[3]).append(",");
  19. // 年龄段权重
  20. sb.append(appArray[4]).append(",").append(appArray[5]).append(",").append(appArray[6]).append(",");
  21. sb.append(appArray[7]).append(",").append(appArray[8]);
  22. String[] appToValueArray = sb.toString().split(",");
  23. appMap.put(appArray[0], appToValueArray);
  24. sb.delete(0, sb.length());
  25. }
  26. } catch (Exception e) {
  27. e.printStackTrace();
  28. }
  29. }
  30. public static Map<String, String[]> getAppMap() {
  31. return appMap;
  32. }
  33. public static void setAppMap(Map<String, String[]> appMap) {
  34. LoadHdfsTable.appMap = appMap;
  35. }
  36. }
  1. c.ReadHdfsFile.classs
  1. package util;
  2. import java.io.BufferedReader;
  3. import java.io.FileReader;
  4. import java.io.InputStreamReader;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.FSDataInputStream;
  7. import org.apache.hadoop.fs.FileSystem;
  8. import org.apache.hadoop.fs.Path;
  9. /** 读取文件 **/
  10. public class ReadHdfsFile {
  11. public static BufferedReader fileReader(String fileName) throws Exception {
  12. // Configuration conf = new Configuration();
  13. // FileSystem fs = FileSystem.get(conf);
  14. // FSDataInputStream in = fs.open(new Path(fileName));
  15. // BufferedReader br = new BufferedReader(new InputStreamReader(in));
  16. FileReader fr = new FileReader("D:\\share\\project\\userdraw\\appTab.txt");
  17. return new BufferedReader(fr);
  18. }
  19. }
  1. d.TextArrayWritable类
  1. package util;
  2. import java.util.ArrayList;
  3. import org.apache.hadoop.io.ArrayWritable;
  4. import org.apache.hadoop.io.Text;
  5. public class TextArrayWritable extends ArrayWritable {
  6. public TextArrayWritable() {
  7. super(Text.class);
  8. }
  9. public TextArrayWritable(String[] strings) {
  10. super(Text.class);
  11. Text[] texts = new Text[strings.length];
  12. for (int i = 0; i < strings.length; i++) {
  13. texts[i] = new Text(strings[i]);
  14. }
  15. set(texts);
  16. }
  17. public TextArrayWritable(ArrayList<String> strings) {
  18. super(Text.class);
  19. Text[] texts = new Text[strings.size()];
  20. int i = 0;
  21. for (String str : strings) {
  22. texts[i] = new Text(str);
  23. i++;
  24. }
  25. set(texts);
  26. }
  27. public ArrayList<String> toArrayList(String[] writables) {
  28. ArrayList<String> arraylist = new ArrayList<String>();
  29. for (String writable : writables) {
  30. arraylist.add(writable.toString());
  31. }
  32. return arraylist;
  33. }
  34. public ArrayList<String> toArrayList() {
  35. return toArrayList(super.toStrings());
  36. }
  37. }
  1. 三、资源文件
  2. -------------------------------------
  3. 1.Resources
  4. [UserDraw.properties]
  5. #用户画像配置文件:
  6. ################################
  7. #字段分隔符号
  8. Separator=\\|
  9. #日期
  10. Date=11
  11. #手机号
  12. MDN=0
  13. #appID
  14. appID=15
  15. #计数
  16. count=1
  17. #使用时长
  18. ProcedureTime=12
  19. #Hbase数据库配置文件:
  20. consite=hbase.zookeeper.quorum
  21. hbaseip=192.168.0.4,192.168.0.5,192.168.0.6
  22. coftime=dfs.socket.timeout
  23. time=180000
  24. tableDraw=user_draw
  25. 2.pom.xml依赖
  26. <?xml version="1.0" encoding="UTF-8"?>
  27. <project xmlns="http://maven.apache.org/POM/4.0.0"
  28. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  29. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  30. <modelVersion>4.0.0</modelVersion>
  31. <groupId>groupId</groupId>
  32. <artifactId>UserDraw</artifactId>
  33. <version>1.0-SNAPSHOT</version>
  34. <dependencies>
  35. <dependency>
  36. <groupId>commons-codec</groupId>
  37. <artifactId>commons-codec</artifactId>
  38. <version>1.4</version>
  39. </dependency>
  40. <dependency>
  41. <groupId>org.apache.hbase</groupId>
  42. <artifactId>hbase-client</artifactId>
  43. <version>1.2.4</version>
  44. </dependency>
  45. <dependency>
  46. <groupId>org.apache.hbase</groupId>
  47. <artifactId>hbase-server</artifactId>
  48. <version>1.2.4</version>
  49. </dependency>
  50. </dependencies>
  51. </project>
  52. 四、其他
  53. ------------------------------------------------
  54. 1.创建hbase表:
  55. create 'user_draw',{NAME=>'draw',VERSIONS=>1,BLOCKCACHE=>true,BLOOMFILTER=>'ROW',COMPRESSION=>'SNAPPY'},
  56. {SPLITS => ['/','+','0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w',
  57. 'x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']}
  58. 2.入库hbase代码:
  1. for (Text v2 : val) {
  2. String[] splited = v2.toString().split("\\|");
  3. //rowkey
  4. if(k2.toString().length()!=0){
  5. Put put = new Put(Bytes.toBytes(k2.toString()));
  6. //跳过写入Hlog,提高写入速度
  7. put.setDurability(Durability.SKIP_WAL);
  8. put.add(Bytes.toBytes("draw"), Bytes.toBytes("mdn"), Bytes.toBytes(splited[1]));
  9. put.add(Bytes.toBytes("draw"), Bytes.toBytes("male"), Bytes.toBytes(splited[2]));
  10. put.add(Bytes.toBytes("draw"), Bytes.toBytes("female"), Bytes.toBytes(splited[3]));
  11. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age1"), Bytes.toBytes(splited[4]));
  12. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age2"), Bytes.toBytes(splited[5]));
  13. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age3"), Bytes.toBytes(splited[6]));
  14. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age4"), Bytes.toBytes(splited[7]));
  15. put.add(Bytes.toBytes("draw"), Bytes.toBytes("age5"), Bytes.toBytes(splited[8]));
  16. context.write(NullWritable.get(), put);
  17. }
  18. }

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/77752
推荐阅读
相关标签
  

闽ICP备14008679号