GreenPlum中的分区表在数据量较大的情况下对于提升查询性能的帮助非常的,但是GreenPlum本身并没有提供分区表自动维护的工具,这里我们利用GreenPlum的PL/SQL自定义两个分区表自动维护的存储过程(也可以成为函数)。
创建存储过程之前首先要创建一个记录分区表详细信息的视图,这里可以参见上篇博文。由于业务中有多张表需要做分区,而且分区字段的类型并不一样,因此我们首先创建一张字典表,记录每张表的分区类型,如下:
- CREATE TABLE op_tb_partition (
- tb_name varchar(100) DEFAULT NULL,
- timetype varchar(8) DEFAULT NULL
- )DISTRIBUTED BY (tb_name);
-
- select * from op_tb_partition;
- tb_name | timetype
- -----------------------------+----------
- nl_app_action_error_trace | unixtime
- nl_mob_app_anr_data | usertime
- nl_mob_app_error_trace_test | datetime
- (3 rows)
三张表的表结构以及分区分析如下:
- create table nl_app_action_error_trace (
- id bigserial not null,
- timestamp int not null,
- application_id int not null,
- application_instance_id int not null,
- action_id int not null,
- action_type int not null,
- key_action_id int not null,
- error_type int not null,
- error_message varchar(1025) default null,
- error_count int not null,
- request_uri varchar(1024) default null,
- trace_data varchar(512) default null
- )WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,application_id)
- PARTITION BY RANGE (timestamp)
- (
- partition p20160701 start (1467302400::int) end (1467388800::int),
- partition p20160702 start (1467388800::int) end (1467475200::int)
- );
-
-
- create table nl_mob_app_error_trace_test (
- id bigserial not null,
- timestamp timestamp not null,
- mobile_app_id int not null,
- mobile_app_version_id int not null,
- manufacturer_id int not null,
- manufacturer_model_id int not null,
- os_id int not null,
- os_version_id int not null,
- agent_version_id int not null default 0,
- country_id int not null,
- region_id int not null,
- carrier_id int not null,
- connect_type_id int not null,
- key_url_id int not null,
- ip bigint not null default 0,
- host_id int not null,
- host_ip bigint not null default 0,
- error_type int not null,
- error_code int not null,
- error_count int not null,
- request_url_id int not null,
- request_url varchar(1024) DEFAULT NULL,
- trace_data varchar(512) DEFAULT NULL
- )WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,mobile_app_id)
- PARTITION BY RANGE (timestamp)
- (
- partition p20160701 start ('2016-07-01'::date) end ('2016-07-02'::date),
- partition p20160702 start ('2016-07-02'::date) end ('2016-07-03'::date)
- );
-
- CREATE TABLE NL_MOB_APP_ANR_DATA (
- id bigserial NOT NULL ,
- timestamp int NOT NULL,
- timestamp_anr int NOT NULL,
- mobile_app_id int NOT NULL,
- mobile_app_version_id int NOT NULL,
- manufacturer_id int NOT NULL,
- manufacturer_model_id int NOT NULL,
- os_id int NOT NULL,
- os_version_id int NOT NULL,
- carrier_id int NOT NULL,
- connect_type_id int NOT NULL,
- agent_version_id int NOT NULL,
- device_id bigint NOT NULL,
- mobile_anr_id bigint NOT NULL DEFAULT 0,
- anr_message varchar(1024) DEFAULT NULL,
- stacktrace varchar(512) DEFAULT NULL ,
- stacktrace_deobfuscated varchar(512) DEFAULT NULL,
- anr_trace_file varchar(512) DEFAULT NULL,
- anr_system_log varchar(512) DEFAULT NULL,
- additional_info varchar(512) DEFAULT NULL
- )WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,mobile_app_id)
- PARTITION BY RANGE (timestamp)
- (
- partition p20160701 start (1838400::int) end (1839840::int),
- partition p20160702 start (1839840::int) end (1841280::int)
- );
创建添加分区的存储过程,GreenPlum中单引号的转义符为两个单引号,详细代码如下:
- create or replace function add_partition_day()
- returns text as
- $$
- declare tb_options record;
- declare curr_part varchar(8);
- declare max_part varchar(20);
- declare part_name varchar(9);
- declare end_part varchar(8);
- declare start_range int;
- declare end_range int;
- declare x int;
- begin
- for tb_options in select * from op_tb_partition group by tb_name,timetype loop
- if tb_options.timetype = 'datetime' then
- select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
- select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
- select date(max_part) - date(curr_part) into x;
- while x > 0 loop
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
- select 'p' || curr_part into part_name;
- execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (''' || curr_part || '''::date) end (''' || end_part || '''::date) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
- x = x-1;
- end loop;
- end_part = '';
- end_range = 0;
- elsif tb_options.timetype = 'usertime' then
- select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
- select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
- select date(max_part) - date(curr_part) into x;
- while x > 0 loop
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
- select nl_to_timestamp(date(curr_part)) into start_range;
- select nl_to_timestamp(date(end_part)) into end_range;
- select 'p' || curr_part into part_name;
- execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (' || start_range || '::int) end (' || end_range || '::int) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
- -- alter table tb_options.tb_name add partition part_name start (start_range::int) end (end_range::int);
- x = x-1;
- end loop;
- end_part = '';
- end_range = 0;
- elsif tb_options.timetype = 'unixtime' then
- select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
- select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
- select date(max_part) - date(curr_part) into x;
- while x > 0 loop
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
- select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
- select unix_timestamp(date(curr_part)) into start_range;
- select unix_timestamp(date(end_part)) into end_range;
- select 'p' || curr_part into part_name;
- execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (' || start_range || '::int) end (' || end_range || '::int) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
- -- alter table tb_options.tb_name add partition part_name start (start_range::int) end (end_range::int);
- x = x-1;
- end loop;
- end_part = '';
- end_range = 0;
- end if;
- end loop;
- return 'ok';
- end;
- $$
- LANGUAGE plpgsql;
创建删除分区的存储过程,这里的数据保存3个月,详细代码如下:
- create or replace function drop_partition_day()
- returns text as
- $$
- declare tb_options record;
- declare const_part int default 90+7;
- declare part_count int;
- declare min_part_name varchar(255);
- declare x int;
- begin
- for tb_options in select * from op_tb_partition group by tb_name,timetype loop
- select count(*) into part_count from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname = tb_options.tb_name;
- select part_count - const_part into x;
- while x > 0 loop
- select min(a.partition_name) into min_part_name from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname = tb_options.tb_name;
- execute 'alter table ' || tb_options.tb_name || ' drop partition ' || min_part_name;
- x = x -1;
- end loop;
- part_count = 0;
- min_part_name = '';
- x = 0;
- end loop;
- return 'ok';
- end;
- $$
- LANGUAGE plpgsql;
调用声明好的存储过程,如下:
- testdb=# select add_partition_day();
- add_partition_day
- -------------------
- ok
- (1 row)
-
- testdb=# select drop_partition_day();
- drop_partition_day
- --------------------
- ok
- (1 row)