赞
踩
框架图:
flume:一个分布式、可靠、和高可用的海量日志采集、聚合和传输的系统。支持在日志系统中定制各类数据发送方,用于收集数据;同时,Flume提供对数据进行简单处理,并写到各种数据接受方(比如文本、HDFS、Hbase等)的能力
kafka:一种高吞吐量的分布式发布订阅消息系统,它可以处理消费者规模的网站中的所有动作流数据。这种动作(网页浏览,搜索和其他用户的行动)是在现代网络上的许多社会功能的一个关键因素。这些数据通常是由于吞吐量的要求而通过处理日志和日志聚合来解决
storm:流计算
mysql:数据库:用于与web的链接。(目前涉及到客户端的都是利用数据库进行交互)
hadoop:分布式大数据框架
hive:数据仓库
spark:基于内存的分布式大数据框架
redis:键值数据库
利用maven 进行依赖项的自行加载,不需要我们手动导包。
看此篇文章部署:eclipse配置maven环境
利用eclipse进行开发。
关于电话诈骗建立的几个类
1.数据库的链接:JDBC
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.ResultSet;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.commons.dbutils.BasicRowProcessor;
- import org.apache.commons.dbutils.QueryRunner;
- import org.apache.commons.dbutils.handlers.ArrayListHandler;
-
- public final class MyDbUtils {// 拒绝继承
- private static String className = "com.mysql.jdbc.Driver";
- private static String url = "jdbc:mysql://192.168.115.130:3306/test?useUnicode=true&characterEncoding=utf-8";
- private static String user = "root";
- private static String password = "root";
- private static QueryRunner queryRunner = new QueryRunner();
-
- public static final String INSERT_LOG = "INSERT INTO LOG(topdomain,usetime,time) VALUES(?,?,?)";
-
- // 拒绝new一个实例
- private MyDbUtils() {
- };
-
- static {// 调用该类时既注册驱动
- try {
- Class.forName(className);
- } catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException();
- }
- }
-
- public static void main(String[] args) {
- }
-
- public static List<String> executeQuerySql(String sql) {
- List<String> result = new ArrayList<String>();
- try {
- List<Object[]> requstList = queryRunner.query(getConnection(), sql,
- new ArrayListHandler(new BasicRowProcessor() {
- @Override
- public <Object> List<Object> toBeanList(ResultSet rs,
- Class<Object> type) throws SQLException {
- return super.toBeanList(rs, type);
- }
- }));
- for (Object[] objects : requstList) {
- result.add(objects[0].toString());
- }
- } catch (SQLException e) {
- e.printStackTrace();
- }
- return result;
- }
-
- @SuppressWarnings("unused")
- public static void update(String sql, Object... params) {
- try {
- Connection connection = getConnection();
- queryRunner.update(connection, sql, params);
- connection.close();
- } catch (SQLException e) {
- e.printStackTrace();
- }
- }
-
- // 获取连接
- public static Connection getConnection() throws SQLException {
- return DriverManager.getConnection(url, user, password);
- }
-
- }
2.storm的topology创建(相当于jobtrack的创建)
- import storm.kafka.BrokerHosts;
- import storm.kafka.KafkaSpout;
- import storm.kafka.SpoutConfig;
- import storm.kafka.ZkHosts;
- import backtype.storm.Config;
- import backtype.storm.LocalCluster;
- import backtype.storm.StormSubmitter;
- import backtype.storm.generated.AlreadyAliveException;
- import backtype.storm.generated.InvalidTopologyException;
- import backtype.storm.generated.StormTopology;
- import backtype.storm.topology.TopologyBuilder;
-
-
- public class CdrTopology {
- public static void main(String[] args) {
- TopologyBuilder topologyBuilder = new TopologyBuilder();
- String KAFKASPOUT = KafkaSpout.class.getSimpleName();
- String SPLIT_BOLT = SplitBolt.class.getSimpleName();
- String SAVETOKAFKABOLT = SaveCallLogToKafkaBolt.class.getSimpleName();
- String SPLIT_BOLT1 = SplitBolt1.class.getSimpleName();
- String SAVETOKAFKABOLT1 = SaveCallLogToKafkaBolt1.class.getSimpleName();
- String SAVETOMYSQL = SavaCallLogToMysql.class.getSimpleName();
-
- //配置zookeeper
- BrokerHosts hosts = new ZkHosts("localhost:2181");//指定kafka使用的zk地址
- String topic = "cdr_log";//主题
- String zkRoot = "/kafka";//指定一个zk节点,节点不存在会自动创建。[这个节点会创建在storm集群使用的zk中]
- String id = "123";//groupid
- SpoutConfig spoutConf = new SpoutConfig(hosts, topic, zkRoot, id);
- topologyBuilder.setSpout(KAFKASPOUT, new KafkaSpout(spoutConf));
- //topologyBuilder.setSpout(SPOUT_ID, new KafkaSpout(spoutConf),3);
- topologyBuilder.setBolt(SPLIT_BOLT1, new SplitBolt1()).shuffleGrouping(KAFKASPOUT);
- topologyBuilder.setBolt(SAVETOKAFKABOLT1, new SaveCallLogToKafkaBolt1()).shuffleGrouping(SPLIT_BOLT,"calllog");
- //topologyBuilder.setBolt(SAVETOMYSQL, new SavaCallLogToMysql()).shuffleGrouping(SPLIT_BOLT);
-
- StormTopology createTopology = topologyBuilder.createTopology();
- String simpleName = CdrTopology.class.getSimpleName();
- Config config = new Config();
- config.setStatsSampleRate(1D);// 开启精确计数
- if(args.length==0){
- LocalCluster localCluster = new LocalCluster();
- localCluster.submitTopology(simpleName, config, createTopology);
- }else{
- try {
- //config.setNumWorkers(45);
- config.setMaxSpoutPending(1000);
- StormSubmitter.submitTopology(simpleName, config, createTopology);
- } catch (AlreadyAliveException e) {
- e.printStackTrace();
- } catch (InvalidTopologyException e) {
- e.printStackTrace();
- }
- }
-
- }}
3.作业的分区:
- import kafka.producer.Partitioner;
- import kafka.utils.VerifiableProperties;
-
- /**
- * Created by jason on 2016/11/27.
- */
- public class PartitionerDemo implements Partitioner {
-
-
- private VerifiableProperties verifiableProperties;
-
- public PartitionerDemo(VerifiableProperties verifiableProperties) {
- this.verifiableProperties=verifiableProperties;
- }
-
- public int partition(Object key, int numPartitions) {
-
- String strKey= (String) key;
- //根据userid的hashCode分区
- return strKey.hashCode()%numPartitions;
- }
- }
4.数据库各个表的类建立
- import java.sql.Connection;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.Map;
-
- import cn.com.cintel.storm_siyuan.utils.MyDbUtils;
- import clojure.string__init;
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.tuple.Tuple;
-
- public class SavaCallLogToMysql extends BaseRichBolt{
- private OutputCollector collector;
- private int time=0;
- private String callingnumber="0";
- private String callednumber="0";
- private String callingarea="0";
- private String calledarea="0";
- int is_land=0;
- int domain=0;
- private Connection connection;
- @Override
- public void prepare(Map stormConf, TopologyContext context,
- OutputCollector collector) {
- // TODO Auto-generated method stub
- this.collector = collector;
- this.connection = null;//注意:建议在这使用连接池
- }
-
- @Override
- public void execute(Tuple tuple) {
- // TODO Auto-generated method stub
- try {
- connection = MyDbUtils.getConnection();
- Statement state = connection.createStatement();
- time = tuple.getIntegerByField("time");
- callingnumber = tuple.getStringByField("callingnumber");
- callednumber = tuple.getStringByField("callednumber");
- callingarea = tuple.getStringByField("callingarea");
- calledarea = tuple.getStringByField("calledarea");
- is_land = tuple.getIntegerByField("is_land");
- domain = tuple.getIntegerByField("domain");
- String sql = "insert into calllog(time,callingnumber,callednumber,callingarea,calledarea,is_inland,domain) values("+time+",'"+callingnumber+"',"+"'"+callednumber+"',"+"'"+callingarea+"',"+"'"+calledarea+"',"+is_land+","+domain+")";
- System.out.println(sql);
- state.executeUpdate(sql);
- } catch (SQLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- if(connection!=null){
- try {
- connection.close();
- } catch (SQLException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
-
- }
-
- @Override
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public Map<String, Object> getComponentConfiguration() {
- // TODO Auto-generated method stub
- return super.getComponentConfiguration();
- }
-
- }
- import java.io.BufferedInputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.Map;
- import java.util.Properties;
- import java.util.Random;
-
- import kafka.javaapi.producer.Producer;
- import kafka.producer.KeyedMessage;
- import kafka.producer.ProducerConfig;
-
- import org.omg.CORBA.PRIVATE_MEMBER;
-
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.tuple.Tuple;
-
- public class SaveCallLogToKafkaBolt extends BaseRichBolt {
- private String topic;
- private Properties prop;
- private Producer<String, String> producer;
- Random random=null;
- private int time;
- private String callingnumber;
- private String callednumber;
- private String callingarea;
- private String calledarea;
- int is_land;
- int domain;
- @Override
- public void prepare(Map arg0, TopologyContext arg1, OutputCollector arg2) {
- System.out.println("SaveCallLogToKafkaBolt start");
- topic = "call_log";
- prop = new Properties();
- try {
- //加载producer的配置
- prop.load(SaveCallLogToKafkaBolt.class.getClassLoader().getResourceAsStream("producer.properties"));
- } catch (IOException e) {
- e.printStackTrace();
- }
- /*prop.setProperty("metadata.broker.list", "192.168.115.130:9092,192.168.115.132:9092,192.168.115.133:9092");
- prop.setProperty("partitioner.class", "cn.com.cintel.storm_siyuan.PartitionerDemo");
- prop.setProperty("producer.type", "sync");
- prop.setProperty("compression.codec", "none");
- prop.setProperty("serializer.class", "kafka.serializer.StringEncoder");*/
- producer = new Producer<>(new ProducerConfig(prop));
- random=new Random();
- }
- @Override
- public void execute(Tuple tuple) {
- try
- {
- StringBuffer keyedMessage = getKeyedMessage(tuple);
- String msg = keyedMessage.toString();
- //System.out.println(msg);
- Integer tt=random.nextInt(3);
- System.out.println("SaveCallLogToKafkaBolt msg:"+msg);
- producer.send(new KeyedMessage<String, String>(topic,tt.toString(),msg));
- }
- catch (Exception e)
- {
- // TODO Auto-generated catch block
- e.printStackTrace();
-
- }
-
- }
-
- private StringBuffer getKeyedMessage(Tuple tuple) {
- // TODO Auto-generated method stub
- time = tuple.getIntegerByField("time");
- callingnumber = tuple.getStringByField("callingnumber");
- callednumber = tuple.getStringByField("callednumber");
- callingarea = tuple.getStringByField("callingarea");
- calledarea = tuple.getStringByField("calledarea");
- is_land = tuple.getIntegerByField("is_land");
- domain = tuple.getIntegerByField("domain");
- StringBuffer msg = new StringBuffer();
- msg.append(time);
- msg.append("|" +callingnumber);
- msg.append("|" +callednumber);
- msg.append("|" +callingarea);
- msg.append("|" +calledarea);
- msg.append("|" +is_land);
- msg.append("|" +domain);
- msg.append("|" +"55");
- msg.append("|" +"66");
- msg.append("|" +";");
- return msg;
- }
- @Override
- public void declareOutputFields(OutputFieldsDeclarer arg0) {
- // TODO Auto-generated method stub
-
- }
-
- }
5.storm的bolt处理(bolt:对tuple处理的抽象过程)
- import java.util.Map;
-
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.tuple.Fields;
- import backtype.storm.tuple.Tuple;
- import backtype.storm.tuple.Values;
-
- public class SplitBolt1 extends BaseRichBolt{
- private OutputCollector collector;
- String log;
- String[] splited;
- String time;
- public void prepare(Map arg0, TopologyContext arg1, OutputCollector arg2) {
- // TODO Auto-generated method stub
- this.collector = arg2;
-
- }
-
- public void execute(Tuple input) {
- // TODO Auto-generated method stub
- //time,callingnumber,callednumber,callingarea,calledarea,is_land,domain
- try {
- log = new String(input.getBinaryByField("bytes"));
- splited = log.split("\\|",-1);
- Message msg = new Message();
- msg.setTime(Integer.parseInt(splited[0]));
- msg.setCallingnumber(splited[1]);
- msg.setCallednumber(splited[2]);
- msg.setCallingarea(splited[3]);
- msg.setCalledarea(splited[4]);
- msg.setIs_land(Integer.parseInt(splited[5]));
- msg.setDomain(Integer.parseInt(splited[6]));
- this.collector.emit("calllog", new Values(msg));
- this.collector.ack(input);
- } catch (NumberFormatException e) {
- // TODO Auto-generated catch block
- this.collector.fail(input);
- e.printStackTrace();
- }
- }
-
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- // TODO Auto-generated method stub
- declarer.declareStream("calllog",new Fields("calllog"));
- }
-
- }
对于spark和hive的完善,请看下篇。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。