赞
踩
//新建库 并使用库 create database events; use events; //设置变量名为db 指向库名 events set hivevar:db=events; //设置允许所有的分区列都是动态分区列00000000000000000000 set hive.exec.dynamic.partition.mode=nonstrict; //设置允许动态分区功能 SET hive.optimize.sort.dynamic.partition=true; //关闭map阶段的表关联 SET hive.auto.convert.join=false; //hive 路径下查询各个文件夹存储情况 或者可以直接用 df -h 来查看磁盘使用情况 防止磁盘过满而引起建表不成功 [root@hadoop222 hive]# du -h -x --max-depth=1 1.6M ./examples 104K ./docs 24K ./conf 2.0M ./scripts 20M ./hcatalog 7.8M ./auxlib 113M ./lib 160K ./bin 32K ./data 0 ./warehouse 145M . //查询yarn节点的工作状态 [root@hadoop222 hive]# yarn node -list all 21/06/07 12:03:17 INFO client.RMProxy: Connecting to ResourceManager at hadoop222/192.168.146.222:8032 21/06/07 12:03:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Total Nodes:1 Node-Id Node-State Node-Http-Address Number-of-Running-Containers hadoop222:36360 RUNNING hadoop222:8042 0 //关联hbase里面的users表到hive (外部表) create external table ${db}.hb_users(user_id String, birth_year INT,gender String,locale String,location String,time_zone String,joined_at String) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=':key,profile:birthyear,profile:gender,region:locale,region:location,region:timezone,registration:joinedAt') tblproperties ('hbase.table.name'='events_db:users'); // 创建内部表 users 将hb_users外部数据 以orc的格式存储到hive内部表users中 // 提高了读取效率 create table ${db}.users stored as orc as select * from ${db}.hb_users; // 删除外部表 , 因为生成内部表后就不需要外部表了 drop table if exists ${db}.hb_users;
其他的表都是一样的重复操作
//继续创建 events表的映射 create external table ${db}.hb_events( event_id String, user_id String, event_time String, city String, state String, zip String, country String, lat Float, lng Float, common_words String ) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=' :key, creator:user_id, schedule:start_time, location:city, location:state, location:zip, location:country, location:lat, location:lng, remark:common_words ') tblproperties ('hbase.table.name'='events_db:events'); //根据events外部表生成内部表 create table ${db}.events stored as orc as select * from ${db}.hb_events; // 删除外部表 , 因为生成内部表后就不需要外部表了 drop table if exists ${db}.hb_events; //user_friend 外部表 create external table ${db}.hb_user_friend( row_key String, user_id String, friend_id String ) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=' :key, uf:userid, uf:friendid ') tblproperties ('hbase.table.name'='events_db:user_friend'); //user_friend 内部表 create table ${db}.user_friend stored as orc as select * from ${db}.hb_user_friend; // 删除外部表 , 因为生成内部表后就不需要外部表了 drop table if exists ${db}.hb_user_friend; //event_attendees 外部表 create external table ${db}.hb_event_attendee( row_key String, event_id String, user_id String, attend_type String ) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=' :key, euat:eventid, euat:friendid, euat:state ') tblproperties ('hbase.table.name'='events_db:event_attendee'); //event_attendees 内部表 create table ${db}.event_attendee stored as orc as select * from ${db}.hb_event_attendee; // 删除外部表 , 因为生成内部表后就不需要外部表了 drop table if exists ${db}.hb_event_attendee; //train 外部表 create external table ${db}.hb_train( row_key String, user_id String, event_id String, invited String, timestamp String, interested String ) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=' :key, eu:user, eu:event, eu:invited, eu:timestamp, eu:interested ') tblproperties ('hbase.table.name'='events_db:train'); //train 内部表 create table ${db}.train stored as orc as select * from ${db}.hb_train; // 删除外部表 , 因为生成内部表后就不需要外部表了 drop table if exists ${db}.hb_train;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。