赞
踩
pg表分区
官网链接:http://www.postgres.cn/docs/11/ddl-partitioning.html
一、声明式划分
PostgreSQL提供了表分区特性。通常支持范围RANGE划分,比如按时间。列表划分、哈希分区。
特点:
a.无法把一个常规表转换成分区表,反之亦然。不过,可以把一个包含数据的常规表或者分区表作为分区加入到另一个分区表,或者从分区表中移走一个分区并且把它变成一个独立的表
b.分区本身也可能被定义为分区表,这种用法被称为子分区。分区可以有自己的与其他分区不同的索引、约束以及默认值
c.在分区表的键列上创建一个索引,还有其他需要的索引(键索引并不是必需的,但是大部分场景中它都能很有帮助)。这会自动在每个分区上创建一个索引,并且后来创建或者附着的任何分区也将会包含索引
d.由于分区表并不直接拥有任何数据,尝试在分区表上使用TRUNCATE
ONLY
将总是返回错误
e .如果NOT NULL
约束在父表中存在,那么就不能删除分区的列上的对应的NOT NULL
约束。分区表的CHECK约束和NOT NULL约束总是会被其所有的分区所继承。不允许在分区表上创建标记为NO INHERIT的CHECK约束。只要分区表中不存在分区,则支持使用ONLY仅在分区表上增加或者删除约束。一旦分区存在,那样做就会导致错误,因为当分区存在时是不支持仅在分区表上增加或删除约束的。不过,分区表本身上的约束可以被增加(如果它们不出现在父表中)和删除。
示例:
1.通过指定PARTITION BY
子句把measurement
表创建为分区表
CREATE TABLE public.measurement (
city_id int4 NOT NULL,
logdate date NOT NULL,
peaktemp int4 NULL,
unitsales int4 NULL
)
PARTITION BY RANGE (logdate);
2.创建分区子表(约束在子表创建,无法在主表创建跨越所有分区的约束)
CREATE TABLE public.measurement_y2006m03 PARTITION OF public.measurement (
CONSTRAINT measurement_y2006m03_pk PRIMARY KEY (city_id)
)FOR VALUES FROM ('2006-03-01') TO ('2006-04-01');
3.索引创建(主表创建,分区也生效)
CREATE INDEX measurement_logdate_idx ON ONLY public.measurement USING btree (logdate);
4.分区维护
--删除掉不再需要的分区
DROP TABLE measurement_y2006m03;
--分区从分区表中移除,但是保留它作为一个独立的表
ALTER TABLE measurement DETACH PARTITION measurement_y2006m02;
--创建默认的表分区(版本11以及以上支持)
create table measurement_default partition of measurement default;
5.存储函数(实战)
创建分区任务
/* -- function: Create partitions for table measurement. -- parameter: -- p_partition_unit: Partition unit. month, day; -- p_partiton_cnt: Count of partitions to be created when this function is called. The default value is 7. -- p_start_utc_time: One UTC timestamp value that is in the range of the first partition's partition-key, in milliseconds. The default value is now. -- return: -- success: 0 -- 模板复用方法: -- 1. measurement 替换为新的表名 -- 2. (city_id) 替换为新的表的 (主键字段) */ create or replace function func_create_partition_for_measurement(p_partition_unit text default 'month', p_partiton_cnt int default 6, p_start_time timestamp default now()) returns int as $body$ declare v_child_tbname text; v_start_time timestamp with time zone; v_end_time timestamp with time zone; v_has_default int default 0; v_parent_tbname text; v_partition_defaultname text; v_partition_defaultname_tmp text; partition_interval interval; time_format text; begin v_parent_tbname := 'measurement'; v_partition_defaultname := v_parent_tbname || '_default'; v_partition_defaultname_tmp := v_partition_defaultname || '_tmp'; --pg_inherits为系统表用来记录表间的继承关系 inhparent为主表oid inhrelid为分区表oid regclass是oid的别名 oid是一个库中对象的唯一标识 perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname; if found then v_has_default := 1; --如果默认分区*_default在系统表中已经存在,解除该表的分区(但是数据未删除) execute 'alter table ' || v_parent_tbname || ' detach partition ' || v_partition_defaultname ; end if; partition_interval := (case p_partition_unit when 'month' then interval '1 month' when 'day' then interval '1 day' end); time_format := (case p_partition_unit when 'month' then 'YYYYMM' when 'day' then 'YYYYMMDD' end); if p_partition_unit in ('month', 'day') then --按照月分割开始时间的时间戳 v_start_time := date_trunc(p_partition_unit, p_start_time); for v_i in 0..(p_partiton_cnt - 1) loop v_child_tbname := v_parent_tbname || '_' || to_char(v_start_time, time_format); perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_child_tbname; if not found then v_end_time := v_start_time + partition_interval; execute 'create table ' || v_child_tbname || ' partition of ' || v_parent_tbname || ' for values from (''' || v_start_time || ''') to (''' || v_end_time || ''')'; execute 'alter table ' || v_child_tbname || ' add constraint pk_' || v_child_tbname || ' primary key (city_id)'; end if; v_start_time := v_start_time + partition_interval; end loop; end if; if v_has_default = 1 then --如果默认分区表存在,把default分区表的数据重新分区 execute 'drop table if exists ' || v_partition_defaultname_tmp; execute 'alter table ' || v_partition_defaultname || ' drop constraint if exists pk_' || v_partition_defaultname; execute 'alter table ' || v_partition_defaultname || ' rename to ' || v_partition_defaultname_tmp ; execute 'create table ' || v_partition_defaultname || ' partition of ' || v_parent_tbname || ' default'; execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (city_id)'; execute 'insert into ' || v_parent_tbname || ' select * from ' || v_partition_defaultname_tmp; execute 'drop table if exists ' || v_partition_defaultname_tmp; else execute 'create table ' || v_partition_defaultname || ' partition of ' || v_parent_tbname || ' default'; execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (city_id)'; end if; return 0; end; $body$ language plpgsql; select func_create_partition_for_measurement('month',2,'2023-02-23')
保存最近?天的数据
/* -- function: Clean old data for measurement -- parameter: -- p_partition_unit: Partition unit. month, day; -- p_keep_days: Duration of data retention, in days. The default value is 90. -- return: -- success: 0 -- 模板复用方法: -- 1. measurement 替换为新的表名 -- 2. logdate 替换为 分区字段。 */ create or replace function func_clean_partition_for_measurement(p_partition_unit varchar default 'month', p_keep_days int default 90) returns int as $body$ declare v_parent_tbname text; v_partition_defaultname text; v_sql text; v_clean_time timestamp with time zone ; v_recd record; v_partiton_colname text; time_format text; begin v_sql = e'select now() - interval \'' || p_keep_days || e' day\''; execute v_sql into v_clean_time; v_parent_tbname := 'measurement'; v_partiton_colname := 'logdate'; v_partition_defaultname := v_parent_tbname || '_default'; time_format := (case p_partition_unit when 'month' then 'YYYYMM' when 'day' then 'YYYYMMDD' end); perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname; if found then execute 'delete from ' || v_partition_defaultname || ' where ' || v_partiton_colname || ' < ' || 'cast(''' || v_clean_time || ''' as timestamp with time zone)'; end if; if p_partition_unit in ('month', 'day') then for v_recd in select cast(cast(inhparent as regclass) as text) as parent_tbname, cast(cast(inhrelid as regclass) as text) as child_tbname from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) < v_parent_tbname || '_' || to_char(v_clean_time, time_format) order by child_tbname asc loop execute 'drop table if exists ' || v_recd.child_tbname; end loop; end if; return 0; end; $body$ language plpgsql; select func_clean_partition_for_measurement('month',90);
模拟数据
--模拟数据 --创建自增序列 create sequence public.id_seq increment by 1 minvalue 1 maxvalue 9223372036854775807 cache 1 no cycle; select nextval('public.id_seq'); with dedate as ( select ( select array_agg(i::date) from generate_series('2008-01-01'::date, '2008-03-01'::date, '+1 day'::interval) as t(i) ) ) insert into product select nextval('public.id_seq') as id, coalesce(( select * from dedate )[(random()* 59)] , '2008-01-01')as logdate, floor(random()* 20)+ 1 as peaktemp, floor(random()* 10000)+ 1 as unitsales from generate_series(1, 1000000, 1);
二、使用继承实现
虽然内建的声明式分区适合于大部分常见的用例,但还是有一些场景需要更加灵活的方法。分区可以使用表继承来实现,这能够带来一些声明式分区不支持的特性,例如:
ACCESS EXCLUSIVE
锁,而在常规继承的情况下一个SHARE UPDATE EXCLUSIVE
锁就足够了。示例:
--创建主表 create table product ( id int not null, logdate date not null, peaktemp int, unitsales int ); --创建子表1 create table public.product_y2008m01 ( constraint product_y2008m01_logdate_check check (((logdate >= '2008-01-01'::date) and (logdate < '2008-02-01'::date))) ) inherits (public.product); create index product_y2008m01_logdate_idx on public.product_y2008m01 using btree (logdate); --创建子表2 create table public.product_y2008m02 ( constraint product_y2008m02_logdate_check check (((logdate >= '2008-02-01'::date) and (logdate < '2008-03-01'::date))) ) inherits (public.product); create index product_y2008m02_logdate_idx on public.product_y2008m02 using btree (logdate); --创建触发器函数 create or replace function product_insert_trigger() returns trigger as $$ begin if ( NEW.logdate >= date '2008-01-01' and NEW.logdate < date '2008-02-01' ) then insert into product_y2008m01 values (NEW.*); elsif ( NEW.logdate >= date '2008-02-01' and NEW.logdate < date '2008-03-01' ) then insert into product_y2008m02 values (NEW.*); else raise exception 'Date out of range. Fix the product_insert_trigger() function!'; end if; return null; end; $$ language plpgsql; --创建触发器 create trigger insert_product_trigger before insert on public.product for each row execute procedure product_insert_trigger();
分区维护
--要快速移除旧数据,只需要简单地去掉不再需要的子表:
DROP TABLE measurement_y2006m02;
--要从继承层次表中去掉子表,但还是把它当做一个表保留:
ALTER TABLE measurement_y2006m02 NO INHERIT measurement;
实战
创建存储函数每月执行建表语句,更新触发器函数
create or replace function func_create_partition_for_product(p_partition_method int default 1, p_partiton_cnt int default 6, p_start_time timestamp default now()) returns int as $body$ declare v_child_tbname text; v_start_time timestamp with time zone; v_end_time timestamp with time zone; v_has_default int default 0; v_parent_tbname text; v_partition_defaultname text; v_partition_defaultname_tmp text; v_partition_unit text; v_partition_interval interval; v_time_format text; begin v_parent_tbname := 'product'; v_partition_defaultname := v_parent_tbname || '_default'; v_partition_defaultname_tmp := v_partition_defaultname || '_tmp'; perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname; if found then v_has_default := 1; execute 'alter table ' || v_partition_defaultname || ' NO INHERIT ' || v_parent_tbname; end if; v_partition_unit := (case p_partition_method when 1 then 'month' when 2 then 'day' end); v_partition_interval := (case p_partition_method when 1 then interval '1 month' when 2 then interval '1 day' end); v_time_format := (case p_partition_method when 1 then 'YYYYMM' when 2 then 'YYYYMMDD' end); if p_partition_method in (1,2) then v_start_time := date_trunc(v_partition_unit, p_start_time); for v_i in 0..(p_partiton_cnt - 1) loop v_child_tbname := v_parent_tbname || '_' || to_char(v_start_time, v_time_format); perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_child_tbname; if not found then v_end_time := v_start_time + v_partition_interval; execute 'create table '||v_child_tbname||'() inherits ('||v_parent_tbname||')'; execute 'ALTER TABLE '||v_child_tbname||' ADD CONSTRAINT '||v_child_tbname||'_check CHECK ((logdate >= ''' || v_start_time || '''::date) and (logdate < ''' || v_end_time || '''::date))'; execute 'ALTER TABLE '||v_child_tbname||' ADD CONSTRAINT '||v_child_tbname||'_pk PRIMARY KEY (id)'; execute 'CREATE UNIQUE INDEX '||v_child_tbname||'_logdate_idx ON '||v_child_tbname||' USING btree (id)'; end if; v_start_time := v_start_time + v_partition_interval; end loop; end if; if v_has_default = 1 then execute 'drop table if exists ' || v_partition_defaultname_tmp; execute 'alter table ' || v_partition_defaultname || ' drop constraint if exists pk_' || v_partition_defaultname; execute 'alter table ' || v_partition_defaultname || ' rename to ' || v_partition_defaultname_tmp ; execute 'create table '||v_partition_defaultname||'() inherits ('||v_parent_tbname||')' ; execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (id)'; execute 'insert into ' || v_parent_tbname || ' select * from ' || v_partition_defaultname_tmp; execute 'drop table if exists ' || v_partition_defaultname_tmp; else execute 'create table '||v_partition_defaultname||'() inherits ('||v_parent_tbname||')' ; execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (id)'; end if; return 0; end; $body$ language plpgsql;
--创建触发器函数 create or replace function product_insert_trigger() returns trigger as $$ declare v_curtime timestamp with time zone; begin v_curtime := date_trunc('month', now()); if ( NEW.logdate >= v_curtime and NEW.logdate < v_curtime + '1 month' ) then execute 'insert into product_' || to_char(v_curtime,'YYYYMM') ||' values ( $1.*)' using NEW; elseif ( NEW.logdate >= v_curtime + '1 month' and NEW.logdate < v_curtime + '2 month' ) then execute 'insert into product_' || to_char(v_curtime + '1 month', 'YYYYMM')|| ' values ($1.*)' using NEW; elseif ( NEW.logdate >= v_curtime + '2 month' and NEW.logdate < v_curtime + '3 month' ) then execute 'insert into product_' || to_char(v_curtime + '2 month', 'YYYYMM')|| ' values ($1.*)' using NEW; else --raise exception 'Date out of range. Fix the product_insert_trigger() function!'; insert into product_default values (NEW.*); end if; return null; end; $$ language plpgsql;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。