赞
踩
Hive 的事务和锁,可以在会话级别设置。
当 hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager 并且 hive.support.concurrency=false 时,不会实例化一个 lock manager。是没有事务和锁的模式,调用获取锁、释放锁、开启事务、关闭事务等调用时直接返回。
当 hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager 并且 hive.support.concurrency=true 时, lock manager 根据 hive.lock.manager 的值创建对应的对象。是没有事务但是有锁的模式。开启事务、关闭事务调用时直接返回。但是获取锁和释放锁操作,转给 lock manager 对应的方法处理。
各云厂商,如阿里云,百度云默认都采用此方案。如果不使用此方案,用户需要手动改配置。
此方案读写表都没有限制。任务的执行时间可以估计,不会出现长时间等待锁的情况。
创建表
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk,mod(hash(ws_sold_time_sk),10)
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
创建表失败,只能用 DbTxnManager 的情况下使用事务表。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as orc tblproperties('transactional'='true');
创建表失败,只能用 DbTxnManager 的情况下使用事务表。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=false;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet tblproperties('transactional'='true','transactional_properties'='insert_only');
注意,此种方式设置以下参数,虽然没有事务管理器,但是 hive.support.concurrency=true;
,使用 hive.lock.manager
设置的锁管理器进行锁的操作。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
创建表
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行。
在 SQL 运行过程,另外打开 hive 窗口,执行 show locks 查看。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
show locks;
显示部分结果如下。
tpcds_hdfs_orc_3@t_web_sales EXCLUSIVE
tpcds_hdfs_orc_3@web_sales SHARED
tpcds_hdfs_orc_3@web_sales@ws_sold_date_sk=2450816 SHARED
tpcds_hdfs_orc_3@web_sales@ws_sold_date_sk=2450817 SHARED
tpcds_hdfs_orc_3@web_sales@ws_sold_date_sk=2450818 SHARED
注意,需要和正在执行的 SQL 设置为相同的事务管理器和锁管理器。
创建表
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行。
创建表
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk,mod(hash(ws_sold_time_sk),10)
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行。
创建表失败,只能用 DbTxnManager 的情况下使用事务表。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as orc tblproperties('transactional'='true');
创建表失败,只能用 DbTxnManager 的情况下使用事务表。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
set hive.support.concurrency=true;
set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet tblproperties('transactional'='true','transactional_properties'='insert_only');
DbTxnManager 是现有的方式配置,必须使用DbLockManager,其他锁的配置无效。并且 hive.support.concurrency
必须为 true
.
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行。
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert into table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务可以同时运行。
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230102')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
select count(distinct ws_sold_time_sk)
from t_web_sales where dt='20230102';
select current_timestamp();
可以看到两个任务不能同时运行。
创建表
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk,mod(hash(ws_sold_time_sk),10)
from web_sales;
select current_timestamp();
可以看到两个任务可以同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as orc tblproperties('transactional'='true');
事务表更严格,抛出的异常种类更多。需要在 metastore 开启 compactor,容易 compactor 和普通任务的相互竞争,导致报错。社区的 bug 很多,有 bug 导致内存溢出。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务不能同时运行,并且抛出以下异常:
FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.lockmgr.LockException(Transaction manager has aborted the transaction txnid:86949. Reason: Aborting [txnid:86949,86950] due to a write conflict on test/t_web_sales/dt=20230101 committed by [txnid:86947,86949] u/u)
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert into table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务可以同时运行
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230102')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
select count(distinct ws_sold_time_sk)
from t_web_sales where dt='20230102';
select current_timestamp();
可以看到两个任务不能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert into table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
select count(distinct ws_sold_time_sk)
from t_web_sales where dt='20230102';
select current_timestamp();
可以看到两个任务可以同时运行。
只能用 DbTxnManager 的情况下使用事务表。数据格式可以是其他格式。对相同分区可以多个会话使用 insert 但是不能使用 insert overwrite.
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
drop table if exists t_web_sales;
create table t_web_sales(
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_item_sk bigint) partitioned by (dt string) stored as parquet tblproperties('transactional'='true','transactional_properties'='insert_only');
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales limit 10000;
select current_timestamp();
可以看到两个任务不能同时运行,并且抛出以下异常:
Hive Internal Error: org.apache.hadoop.hive.ql.lockmgr.LockException(Transaction manager has aborted the transaction txnid:86999. Reason: Aborting [txnid:86999,87000] due to a write conflict on test/t_web_sales/dt=20230101 committed by [txnid:86997,86999] u/u)
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert into table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务可以同时运行
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230102')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
可以看到两个任务能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert overwrite table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
select count(distinct ws_sold_time_sk)
from t_web_sales where dt='20230102';
select current_timestamp();
可以看到两个任务不能同时运行。
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
insert into table t_web_sales partition(dt='20230101')
select ws_sold_time_sk,ws_ship_date_sk,ws_item_sk
from web_sales;
select current_timestamp();
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.support.concurrency=true;
select current_timestamp();
select count(distinct ws_sold_time_sk)
from t_web_sales where dt='20230102';
select current_timestamp();
可以看到两个任务可以同时运行。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。