当前位置:   article > 正文

hbase热点问题解决(预分区)

hbase热点问题

HBase 海量存储案例 预分区表

1.1 预分区表

默认创建表的方式,则HBase顺序写入可能会受到RegionServer热点的影响。对行键进行加盐可以解决热点问题。在HBase中,可以使用两种方式:

1.ROWKEY预分区
2.加盐指定数量分区

1.1.1 ROWKEY预分区

按照用户ID来分区,一共4个分区。并指定数据的压缩格式为GZ

drop table if exists ORDER_DTL;

create table if not exists ORDER_DTL(

    "id" varchar primary key,

    C1."status" varchar,

    C1."money" float,

    C1."pay_way" integer,

    C1."user_id" varchar,

    C1."operation_time" varchar,

    C1."category" varchar

)

CONPRESSION='GZ'

SPLIT ON ('3','5','7');

hbase shell 样例

create 'mytable', {NAME=>'cf', VERSIONS=>1, BLOOMFILTER=>'ROW', COMPRESSION=>'SNAPPY', DATA_BLOCK_ENCODING=>'FAST_DIFF', TTL => '604800'}, SPLITS => ['10', '20', '30']


我们尝试往表中插入一些数据,然后去HBase中查看数据的分布情况。

UPSERT INTO "ORDER_DTL" VALUES('02602f66-adc7-40d4-8485-76b5632b5b53','已提交',4070,1,'4944191','2020-04-25 12:09:16','手机;');

UPSERT INTO "ORDER_DTL" VALUES('0968a418-f2bc-49b4-b9a9-2157cf214cfd','已完成',4350,1,'1625615','2020-04-25 12:09:37','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('0e01edba-5e55-425e-837a-7efb91c56630','已提交',6370,3,'3919700','2020-04-25 12:09:39','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('0e01edba-5e55-425e-837a-7efb91c56630','已付款',6370,3,'3919700','2020-04-25 12:09:44','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('0f46d542-34cb-4ef4-b7fe-6dcfa5f14751','已提交',9380,1,'2993700','2020-04-25 12:09:41','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('0f46d542-34cb-4ef4-b7fe-6dcfa5f14751','已付款',9380,1,'2993700','2020-04-25 12:09:46','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('1fb7c50f-9e26-4aa8-a140-a03d0de78729','已完成',6400,2,'5037058','2020-04-25 12:10:13','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('23275016-996b-420c-8edc-3e3b41de1aee','已付款',280,1,'3018827','2020-04-25 12:09:53','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('2375a7cf-c206-4ac0-8de4-863e7ffae27b','已完成',5600,1,'6489579','2020-04-25 12:08:55','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('2375a7cf-c206-4ac0-8de4-863e7ffae27b','已付款',5600,1,'6489579','2020-04-25 12:09:00','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('269fe10c-740b-4fdb-ad25-7939094073de','已提交',8340,2,'2948003','2020-04-25 12:09:26','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('269fe10c-740b-4fdb-ad25-7939094073de','已付款',8340,2,'2948003','2020-04-25 12:09:30','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('2849fa34-6513-44d6-8f66-97bccb3a31a1','已提交',7060,2,'2092774','2020-04-25 12:09:38','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('28b7e793-6d14-455b-91b3-0bd8b23b610c','已提交',640,3,'7152356','2020-04-25 12:09:49','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('28b7e793-6d14-455b-91b3-0bd8b23b610c','已付款',9410,3,'7152356','2020-04-25 12:10:01','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('2909b28a-5085-4f1d-b01e-a34fbaf6ce37','已提交',9390,3,'8237476','2020-04-25 12:10:08','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('2a01dfe5-f5dc-4140-b31b-a6ee27a6e51e','已提交',7490,2,'7813118','2020-04-25 12:09:05','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('2a01dfe5-f5dc-4140-b31b-a6ee27a6e51e','已付款',7490,2,'7813118','2020-04-25 12:09:06','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('2b86ab90-3180-4940-b624-c936a1e7568d','已付款',5360,2,'5301038','2020-04-25 12:08:50','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('2b86ab90-3180-4940-b624-c936a1e7568d','已提交',5360,2,'5301038','2020-04-25 12:08:53','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('2b86ab90-3180-4940-b624-c936a1e7568d','已取消',5360,2,'5301038','2020-04-25 12:08:58','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('2e19fbe8-7970-4d62-8e8f-d364afc2dd41','已付款',6490,0,'3141181','2020-04-25 12:09:22','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('2fc28d36-dca0-49e8-bad0-42d0602bdb40','已付款',3820,1,'9054826','2020-04-25 12:10:04','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('31477850-8b15-4f1b-9ec3-939f7dc47241','已提交',4650,2,'5837271','2020-04-25 12:08:52','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('31477850-8b15-4f1b-9ec3-939f7dc47241','已付款',4650,2,'5837271','2020-04-25 12:08:57','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('39319322-2d80-41e7-a862-8b8858e63316','已提交',5000,1,'5686435','2020-04-25 12:08:51','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('39319322-2d80-41e7-a862-8b8858e63316','已完成',5000,1,'5686435','2020-04-25 12:08:56','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('3d2254bd-c25a-404f-8e42-2faa4929a629','已提交',5000,3,'1274270','2020-04-25 12:08:41','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('3d2254bd-c25a-404f-8e42-2faa4929a629','已付款',5000,3,'1274270','2020-04-25 12:08:42','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('3d2254bd-c25a-404f-8e42-2faa4929a629','已完成',5000,1,'1274270','2020-04-25 12:08:43','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('42f7fe21-55a3-416f-9535-baa222cc0098','已完成',3600,2,'2661641','2020-04-25 12:09:58','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('44231dbb-9e58-4f1a-8c83-be1aa814be83','已提交',3950,1,'3855371','2020-04-25 12:08:39','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('44231dbb-9e58-4f1a-8c83-be1aa814be83','已付款',3950,1,'3855371','2020-04-25 12:08:40','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('526e33d2-a095-4e19-b759-0017b13666ca','已完成',3280,0,'5553283','2020-04-25 12:09:01','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('5a6932f4-b4a4-4a1a-b082-2475d13f9240','已提交',50,2,'1764961','2020-04-25 12:10:07','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('5fc0093c-59a3-417b-a9ff-104b9789b530','已提交',6310,2,'1292805','2020-04-25 12:09:36','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('605c6dd8-123b-4088-a047-e9f377fcd866','已完成',8980,2,'6202324','2020-04-25 12:09:54','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('613cfd50-55c7-44d2-bb67-995f72c488ea','已完成',6830,3,'6977236','2020-04-25 12:10:06','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('62246ac1-3dcb-4f2c-8943-800c9216c29f','已提交',8610,1,'5264116','2020-04-25 12:09:14','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('62246ac1-3dcb-4f2c-8943-800c9216c29f','已付款',8610,1,'5264116','2020-04-25 12:09:18','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('625c7fef-de87-428a-b581-a63c71059b14','已提交',5970,0,'8051757','2020-04-25 12:09:07','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('625c7fef-de87-428a-b581-a63c71059b14','已付款',5970,0,'8051757','2020-04-25 12:09:19','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('6d43c490-58ab-4e23-b399-dda862e06481','已提交',4570,0,'5514248','2020-04-25 12:09:34','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('70fa0ae0-6c02-4cfa-91a9-6ad929fe6b1b','已付款',4100,1,'8598963','2020-04-25 12:09:08','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('7170ce71-1fc0-4b6e-a339-67f525536dcd','已完成',9740,1,'4816392','2020-04-25 12:09:51','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('7170ce71-1fc0-4b6e-a339-67f525536dcd','已提交',9740,1,'4816392','2020-04-25 12:10:03','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('71961b06-290b-457d-bbe0-86acb013b0e3','已付款',6550,3,'2393699','2020-04-25 12:08:47','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('71961b06-290b-457d-bbe0-86acb013b0e3','已付款',6550,3,'2393699','2020-04-25 12:08:48','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('71961b06-290b-457d-bbe0-86acb013b0e3','已完成',6550,3,'2393699','2020-04-25 12:08:49','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('72dc148e-ce64-432d-b99f-61c389cb82cd','已提交',4090,1,'2536942','2020-04-25 12:10:12','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('72dc148e-ce64-432d-b99f-61c389cb82cd','已付款',4090,1,'2536942','2020-04-25 12:10:14','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('7c0c1668-b783-413f-afc4-678a5a6d1033','已完成',3850,3,'6803936','2020-04-25 12:09:20','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('7fa02f7a-10df-4247-9935-94c8b7d4dbc0','已提交',1060,0,'6119810','2020-04-25 12:09:21','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('820c5e83-f2e0-42d4-b5f0-83802c75addc','已付款',9270,2,'5818454','2020-04-25 12:10:09','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('83ed55ec-a439-44e0-8fe0-acb7703fb691','已完成',8380,2,'6804703','2020-04-25 12:09:52','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('85287268-f139-4d59-8087-23fa6454de9d','已提交',9750,1,'4382852','2020-04-25 12:09:43','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('85287268-f139-4d59-8087-23fa6454de9d','已付款',9750,1,'4382852','2020-04-25 12:09:48','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('85287268-f139-4d59-8087-23fa6454de9d','已取消',9750,1,'4382852','2020-04-25 12:10:00','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('8d32669e-327a-4802-89f4-2e91303aee59','已提交',9390,1,'4182962','2020-04-25 12:09:57','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('8dadc2e4-63f1-490f-9182-793be64fed76','已付款',9350,1,'5937549','2020-04-25 12:09:02','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('94ad8ee0-8898-442c-8cb1-083a4b609616','已提交',4370,0,'4666456','2020-04-25 12:09:13','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('994cbb44-f0ee-45ff-a4f4-76c87bc2b972','已付款',3190,3,'3200759','2020-04-25 12:09:25','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('9bf92519-6eb3-449a-853b-0e19f6005887','已提交',1100,0,'3457528','2020-04-25 12:10:11','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('9ff3032c-8679-4247-9e6f-4caf2dc93aff','已提交',850,0,'8835231','2020-04-25 12:09:40','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('9ff3032c-8679-4247-9e6f-4caf2dc93aff','已付款',850,0,'8835231','2020-04-25 12:09:45','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('a467ba42-f91e-48a0-865e-1703aaa45e0e','已提交',8040,0,'8206022','2020-04-25 12:09:50','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('a467ba42-f91e-48a0-865e-1703aaa45e0e','已付款',8040,0,'8206022','2020-04-25 12:10:02','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('a5302f47-96d9-41b4-a14c-c7a508f59282','已付款',8570,2,'5319315','2020-04-25 12:08:44','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('a5b57bec-6235-45f4-bd7e-6deb5cd1e008','已提交',5700,3,'6486444','2020-04-25 12:09:27','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('a5b57bec-6235-45f4-bd7e-6deb5cd1e008','已付款',5700,3,'6486444','2020-04-25 12:09:31','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('ae5c3363-cf8f-48a9-9676-701a7b0a7ca5','已付款',7460,1,'2379296','2020-04-25 12:09:23','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('b1fb2399-7cf2-4af5-960a-a4d77f4803b8','已提交',2690,3,'6686018','2020-04-25 12:09:55','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('b21c7dbd-dabd-4610-94b9-d7039866a8eb','已提交',6310,2,'1552851','2020-04-25 12:09:15','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('b4bfd4b7-51f5-480e-9e23-8b1579e36248','已提交',4000,1,'3260372','2020-04-25 12:09:35','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('b63983cc-2b59-4992-84c6-9810526d0282','已提交',7370,3,'3107867','2020-04-25 12:08:45','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('b63983cc-2b59-4992-84c6-9810526d0282','已付款',7370,3,'3107867','2020-04-25 12:08:46','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('bf60b752-1ccc-43bf-9bc3-b2aeccacc0ed','已提交',720,2,'5034117','2020-04-25 12:09:03','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('c808addc-8b8b-4d89-99b1-db2ed52e61b4','已提交',3630,1,'6435854','2020-04-25 12:09:10','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('cc9dbd20-cf9f-4097-ae8b-4e73db1e4ba1','已付款',5000,0,'2007322','2020-04-25 12:08:38','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('ccceaf57-a5ab-44df-834a-e7b32c63efc1','已提交',2660,2,'7928516','2020-04-25 12:09:42','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('ccceaf57-a5ab-44df-834a-e7b32c63efc1','已付款',2660,2,'7928516','2020-04-25 12:09:47','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('ccceaf57-a5ab-44df-834a-e7b32c63efc1','已完成',2660,2,'7928516','2020-04-25 12:09:59','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('d7be5c39-e07c-40e8-bf09-4922fbc6335c','已付款',8750,2,'1250995','2020-04-25 12:09:09','食品;家用电器;');

UPSERT INTO "ORDER_DTL" VALUES('dfe16df7-4a46-4b6f-9c6d-083ec215218e','已完成',410,0,'1923817','2020-04-25 12:09:56','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('e1241ad4-c9c1-4c17-93b9-ef2c26e7f2b2','已付款',6760,0,'2457464','2020-04-25 12:08:54','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('e1241ad4-c9c1-4c17-93b9-ef2c26e7f2b2','已提交',6760,0,'2457464','2020-04-25 12:08:59','数码;女装;');

UPSERT INTO "ORDER_DTL" VALUES('e180a9f2-9f80-4b6d-99c8-452d6c037fc7','已付款',8120,2,'7645270','2020-04-25 12:09:28','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('e180a9f2-9f80-4b6d-99c8-452d6c037fc7','已完成',8120,2,'7645270','2020-04-25 12:09:32','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('e4418843-9ac0-47a7-bfd8-d61c4d296933','已付款',8170,2,'7695668','2020-04-25 12:09:11','家用电器;;电脑;');

UPSERT INTO "ORDER_DTL" VALUES('e8b3bb37-1019-4492-93c7-305177271a71','已完成',2560,2,'4405460','2020-04-25 12:10:05','男装;男鞋;');

UPSERT INTO "ORDER_DTL" VALUES('eb1a1a22-953a-42f1-b594-f5dfc8fb6262','已完成',2370,2,'8233485','2020-04-25 12:09:24','机票;文娱;');

UPSERT INTO "ORDER_DTL" VALUES('ecfd18f5-45f2-4dcd-9c47-f2ad9b216bd0','已付款',8070,3,'6387107','2020-04-25 12:09:04','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('ecfd18f5-45f2-4dcd-9c47-f2ad9b216bd0','已完成',8070,3,'6387107','2020-04-25 12:09:17','酒店;旅游;');

UPSERT INTO "ORDER_DTL" VALUES('f1226752-7be3-4702-a496-3ddba56f66ec','已付款',4410,3,'1981968','2020-04-25 12:10:10','维修;手机;');

UPSERT INTO "ORDER_DTL" VALUES('f642b16b-eade-4169-9eeb-4d5f294ec594','已提交',4010,1,'6463215','2020-04-25 12:09:29','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('f642b16b-eade-4169-9eeb-4d5f294ec594','已付款',4010,1,'6463215','2020-04-25 12:09:33','男鞋;汽车;');

UPSERT INTO "ORDER_DTL" VALUES('f8f3ca6f-2f5c-44fd-9755-1792de183845','已付款',5950,3,'4060214','2020-04-25 12:09:12','机票;文娱;');

我们发现数据分布在每一个Region中。

1.1.2 加盐指定数量分区

drop table if exists ORDER_DTL;

create table if not exists ORDER_DTL(

    "id" varchar primary key,

    C1."status" varchar,

    C1."money" float,

    C1."pay_way" integer,

    C1."user_id" varchar,

    C1."operation_time" varchar,

    C1."category" varchar

)

CONPRESSION='GZ', SALT_BUCKETS=10;

我们在HBaseWeb UI中可以查看到生成了10个Region

插入数据后,发现数据分部在每一个Region中。

查看HBase中的表,我们发现Phoenix在每个ID前,都添加了一个Hash值,用来将分布分布到不同的Region中。

hbase(main):018:0> scan "ORDER_DTL", {LIMIT => 1}

ROW                                                          COLUMN+CELL                                                                                                                                                                    

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x00\x00\x00\x00, timestamp=1589268724801, value=x                                                                                                                   

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x0B, timestamp=1589268724801, value=\xE5\xB7\xB2\xE4\xBB\x98\xE6\xAC\xBE                                                                                        

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x0C, timestamp=1589268724801, value=\xC6\x12\x90\x01                                                                                                            

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x0D, timestamp=1589268724801, value=\x80\x00\x00\x01                                                                                                             

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x0E, timestamp=1589268724801, value=2993700                                                                                                                      

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x0F, timestamp=1589268724801, value=2020-04-25 12:09:46                                                                                                          

 \x000f46d542-34cb-4ef4-b7fe-6dcfa5f14751                    column=C1:\x80\x10, timestamp=1589268724801, value=\xE7\xBB\xB4\xE4\xBF\xAE;\xE6\x89\x8B\xE6\x9C\xBA;                                                                           

1 row(s)

注意:CONPRESSIONSALT_BUCKETS之间需要使用逗号分隔,否则会出现语法错误

2.1 hbase热点问题解决(预分区)

一、出现热点问题原因

       1hbase的中的数据是按照字典序排序的,当大量连续的rowkey集中写在个别的region,各个region之间数据分布不均衡;

       2、创建表时没有提前预分区,创建的表默认只有一个region,大量的数据写入当前region

       3、创建表已经提前预分区,但是设计的rowkey没有规律可循,设计的rowkey应该由regionNo+messageId组成。

二、如何解决热点问题       

       解决这个问题,关键是要设计出可以让数据分布均匀的rowkey,与关系型数据库一样,rowkey是用来检索记录的主键。访问hbase table中的行,rowkey 可以是任意字符串(最大长度 64KB,实际应用中长度一般为 10-100bytes),在hbase内部,rowkey保存为字节数组,存储时,数据按照rowkey的字典序排序存储。

创建表命令:

create 'testTable333', {NAME => 'cf', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'snappy', MIN_VERSIONS => '0', TTL => '15552000', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true', METADATA => {'ENCODE_ON_DISK' => 'true'}}, {SPLITS => ['0001|','0002|','0003|','0004|','0005|','0006|','0007|','0008|','0009|','00010|']}

我这里预分10region,执行命令之后,在hbaseconsole中可以看到以下信息,说明预分区ok了!!!

 1、第一种设计rowkey方式:随机数+messageId,如果想让最近的数据快速get到,可以将时间戳加上,我这里的region0001|0009|开头的,因为hbase的数据是字典序排序的,所以如果我生成的 rowkey=0002rer4343343422,则当前这条数据就会保存到0001|~0002|这个region里,因为我的messageId都是字母+数字,“|”ASCII值大于字母、数字。

生成regionNo的工具类:RegionUtils

package com.cn.dl;

import java.util.Random;

/**

 * Created by Tiger on 2018/4/18.

 */

public class RegionUtils {

    //十个预分区

    private static final int REGION_NUM = 10;

    //存放regionNo0001,0002,...0009,0010

    private static final String[] REGION_ARRAY = new String[REGION_NUM];

    static {

        initRegionArray();

    }

    /**

     * 生成regionNo

     * */

    private static void initRegionArray(){

        for(int i=1; i<=REGION_NUM; i++){

            String regionNo = String.valueOf(i);

            while (regionNo.length() < 4){

                regionNo = "0" + regionNo;

            }

            REGION_ARRAY[i-1] = regionNo;

        }

    }

    /**

     * 随机获取regionNo

     * @return  regionNo

     * */

    public static String getRegionNo(){

        Random random = new Random();

        return REGION_ARRAY[random.nextInt(10)];

    }

    public static void main(String[] args) {

        int i= 0;

        while (i < 100){

            System.out.println(getRegionNo());

            i++;

        }

    }

}

——  生成rowKey

// TODO: 2018/12/18 只是一个生成rowKey的案例

    public void execute(Tuple tuple) {

        try {

            JSONObject json = JSONObject.parseObject(tuple.getStringByField("messageSpout"));

            String messageId = json.getString("messageId");

            // TODO: 2018/12/18 生成rowKey:regionNo+时间戳+messageId ,加上时间戳在hbase中可以提高查询效率

            String rowKey = RegionUtils.getRegionNo() + System.currentTimeMillis() + messageId;

            json.put("rowKey",rowKey);

            System.out.println(json.toJSONString());

        }catch (Exception e){

            e.printStackTrace();

        }finally {

            collector.ack(tuple);

        }

}

 打印结果rowkey=regionNo+时间戳+messageId,前缀是随机的

{"name":"name2","messageId":"b998a8dfc05a4a819284213d4e727a85","age":12,"rowKey":"00041545105001972b998a8dfc05a4a819284213d4e727a85"}

{"name":"name3","messageId":"799affbf346641e8a00bfeee78ffcdb4","age":13,"rowKey":"00031545105002973799affbf346641e8a00bfeee78ffcdb4"}

{"name":"name4","messageId":"bbdf16b9a12b4fa09b060402f9522fed","age":14,"rowKey":"00051545105003973bbdf16b9a12b4fa09b060402f9522fed"}

{"name":"name5","messageId":"03c119868cd742459464df53c3827147","age":15,"rowKey":"0009154510500497403c119868cd742459464df53c3827147"}

{"name":"name6","messageId":"84c682681cdc4ac09ad3d270741074d3","age":16,"rowKey":"0002154510500597484c682681cdc4ac09ad3d270741074d3"}

{"name":"name7","messageId":"aecbd65f3f434452ab4a924d8e42b947","age":17,"rowKey":"00091545105006976aecbd65f3f434452ab4a924d8e42b947"}

{"name":"name8","messageId":"3bcb23e414e5450898b6b0eefff4d80a","age":18,"rowKey":"000315451050079783bcb23e414e5450898b6b0eefff4d80a"}

{"name":"name9","messageId":"40be62bfcea24ea799ae6f191241c5e8","age":19,"rowKey":"0002154510500897840be62bfcea24ea799ae6f191241c5e8"}

{"name":"name10","messageId":"94c220cd10d141c89cf08e61d0a48e7f","age":20,"rowKey":"0007154510500997894c220cd10d141c89cf08e61d0a48e7f"}

{"name":"name11","messageId":"0796f735b1ba43beb7b15d63c4fd4ec8","age":21,"rowKey":"000515451050109780796f735b1ba43beb7b15d63c4fd4ec8"}

{"name":"name12","messageId":"05ac8417e52443f48bf2c56879b3e2c6","age":22,"rowKey":"0009154510501197805ac8417e52443f48bf2c56879b3e2c6"}

{"name":"name13","messageId":"b87484b633b747ba8320cdf69334459b","age":23,"rowKey":"00101545105012978b87484b633b747ba8320cdf69334459b"}

{"name":"name14","messageId":"84c6daf1cdfd4c0f977a8742ee528977","age":24,"rowKey":"0005154510501397984c6daf1cdfd4c0f977a8742ee528977"}

{"name":"name15","messageId":"8e01e5c53d024de18507ed2a98e38519","age":25,"rowKey":"000415451050149808e01e5c53d024de18507ed2a98e38519"}

{"name":"name16","messageId":"48939394581946e881b91e797659d6ca","age":26,"rowKey":"0005154510501598248939394581946e881b91e797659d6ca"}

{"name":"name17","messageId":"b5c2024c721642a5a2b4cc8682c7cd40","age":27,"rowKey":"00021545105016981b5c2024c721642a5a2b4cc8682c7cd40"}

{"name":"name18","messageId":"4efb10fc351947f6a78761e7b3bf1783","age":28,"rowKey":"000215451050179824efb10fc351947f6a78761e7b3bf1783"}

{"name":"name19","messageId":"a4a27bafd5f749d0b08d9eb832120737","age":29,"rowKey":"00041545105018983a4a27bafd5f749d0b08d9eb832120737"}

{"name":"name20","messageId":"1d90758cbcc2495197b2ef0a05e58610","age":30,"rowKey":"000815451050199831d90758cbcc2495197b2ef0a05e58610"}

{"name":"name21","messageId":"47ae3fb0e3914496b75445df92d0a133","age":31,"rowKey":"0002154510502098347ae3fb0e3914496b75445df92d0a133"}

{"name":"name22","messageId":"419dccbeb2b74484997bd5373f8347af","age":32,"rowKey":"00071545105021982419dccbeb2b74484997bd5373f8347af"}

{"name":"name23","messageId":"51e38da4c9c74542bcd38be704ff3fec","age":33,"rowKey":"0005154510502298351e38da4c9c74542bcd38be704ff3fec"}

{"name":"name24","messageId":"e99e783220d14f63be1f967f82bd69fb","age":34,"rowKey":"00081545105023983e99e783220d14f63be1f967f82bd69fb"}

{"name":"name25","messageId":"0e15f181464146598dfe44976676c706","age":35,"rowKey":"000915451050249840e15f181464146598dfe44976676c706"}

{"name":"name26","messageId":"d0d8c0e939b44a688915714b85d61b1f","age":36,"rowKey":"00101545105025985d0d8c0e939b44a688915714b85d61b1f"}

{"name":"name27","messageId":"4a7d3404871a4137b9db4e24209f1346","age":37,"rowKey":"000515451050269844a7d3404871a4137b9db4e24209f1346"}

{"name":"name28","messageId":"51a5f39f032241fcaa6e5956a9ddb474","age":38,"rowKey":"0002154510502798651a5f39f032241fcaa6e5956a9ddb474"}

{"name":"name29","messageId":"c54ec6f46fa047fdae6187afad55e8f0","age":39,"rowKey":"00011545105028986c54ec6f46fa047fdae6187afad55e8f0"}

{"name":"name30","messageId":"076322780dae4748997006d53aa246c1","age":40,"rowKey":"00021545105029987076322780dae4748997006d53aa246c1"}

{"name":"name31","messageId":"563c258840a84f12ad1a5341381b163c","age":41,"rowKey":"00101545105030986563c258840a84f12ad1a5341381b163c"}

{"name":"name32","messageId":"377f3fbe63634fc4aa93b1737dd462da","age":42,"rowKey":"00051545105031988377f3fbe63634fc4aa93b1737dd462da"}

{"name":"name33","messageId":"5c1af0f26cc94d0b85d0fbca617e57ad","age":43,"rowKey":"000715451050329895c1af0f26cc94d0b85d0fbca617e57ad"}

{"name":"name34","messageId":"2d3050a0b3dd42df97ef87d45dbc0d26","age":44,"rowKey":"000715451050339902d3050a0b3dd42df97ef87d45dbc0d26"}

{"name":"name35","messageId":"526401e1f5ab45aeb95421e0ac200e4b","age":45,"rowKey":"00041545105034990526401e1f5ab45aeb95421e0ac200e4b"}总结:

       这种设计的rowkey可以解决热点问题,但是要建立关联表,比如将rowkey保存到数据库或者nosql数据库中,因为前面的regionNo是随机的,不知道 对应数据在hbaserowkey是多少;同一批数据,因为这个regionNo是随机的,所以要到多个regionget数据,不能使用startkeyendkeyget数据。

          2、第二种设计rowkey的方式:通过messageId映射regionNo,这样既可以让数据均匀分布到各个region中,同时可以根据startkeyendkey可以get到同一批数据,messageId映射regionNo,使用一致性hash算法解决,一致性哈希算法在1997年由麻省理工学院的Karger等人在解决分布式Cache中提出的,设计目标是为了解决因特网中的热点(Hot spot)问题,

public class ConsistentHash<T> implements Serializable{

    private static final long serialVersionUID = 1L;

    private  final HashFunction hashFunction;

    //每个regions的虚拟节点个数

    private final int numberOfReplicas;

    //存储虚拟节点的hash值到真实节点的映射

    private final SortedMap<Long, String> circle = new TreeMap<Long, String>();

    public ConsistentHash(HashFunction hashFunction, int numberOfReplicas, Collection<String> nodes) {

        this.hashFunction = hashFunction;

        this.numberOfReplicas = numberOfReplicas;

        for (String node : nodes){

            add(node);

        }

    }

    /**

     * 添加节点

     * @param node

     * @see java.util.TreeMap

     * */

    public void add(String node) {

        for (int i = 0; i < numberOfReplicas; i++)

             /*

              * 不同的虚拟节点(i不同)有不同的hash,但都对应同一个实际机器node

              * 虚拟node一般是均衡分布在环上的,数据存储在顺时针方向的虚拟node

              */

            circle.put(hashFunction.getHashValue(node.toString() + i), node);

    }

    /**

     * 移除节点

     * @param node

     * @see java.util.TreeMap

     * */

    public void remove(String node) {

        for (int i = 0; i < numberOfReplicas; i++)

            circle.remove(hashFunction.getHashValue(node.toString() + i));

    }

    /**

     * 获取对应keyhashcode值,然后根据hashcode获取当前数据储存的真实节点

     * */

    public String get(Object key) {

        if (circle.isEmpty())

            return null;

        //获取对应keyhashcode

        long hash = hashFunction.getHashValue((String) key);

        //数据映射在两台虚拟机器所在环之间,就需要按顺时针方向寻找机器

        if (!circle.containsKey(hash)) {

            SortedMap<Long, String> tailMap = circle.tailMap(hash);

            hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey();

        }

        return circle.get(hash);

    }

    /**

     * 获取hash环节点大小

     * @return

     * */

    public long getSize() {

        return circle.size();

    }

    /**

     * 获取double类型数据的小数位后四位小数

     * @param num

     * @return

     * */

    public String getDecimalPoint(double num){

        DecimalFormat df = new DecimalFormat("0.0000");

        return df.format(num);

    }

}   

public class HashFunction implements Serializable{

    private static final long serialVersionUID = 1L;

    /**

     * 获取对应字符串的hashCode

     * @param key

     * @return

     * */

    public  long getHashValue(String key) {

        final int p = 1677761999;

        int hash = (int) 216613626111L;

        for (int i = 0; i < key.length(); i++)

            hash = (hash ^ key.charAt(i)) * p;

        hash += hash << 13;

        hash ^= hash >> 8;

        hash += hash << 3;

        hash ^= hash >> 18;

        hash += hash << 5;

        // 如果算出来的值为负数则取其绝对值

        if (hash < 0)

            hash = Math.abs(hash);

        return hash;

    }

}

————————————————

我目前满意第二种方式,然后在es中建立关联表,get数据时,先在esgetrowkey,然后在hbase中获取数据,这个根据自己的业务设计。

写的内容有问题,欢迎来吐槽,我会及时修改,谢谢!

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/562233
推荐阅读
相关标签
  

闽ICP备14008679号