赞
踩
PostgreSQL , 索引 , 所有字段 , 并行创建单个索引 , max_parallel_maintenance_workers , 异步调用 , dblink , 并行创建多个索引 , adhoc查询
PostgreSQL 支持丰富的类型、索引,统计信息。
不同的应用场合,数据类型,可使用不同的索引接口(btree,hash,gin,gist,spgist,brin,bloom等)达到不同的效果。
https://www.postgresql.org/docs/11/static/datatype.html
https://www.postgresql.org/docs/11/static/functions.html
https://www.postgresql.org/docs/11/static/indexes.html
在某些业务场合,业务上可能需要对任意字段组合进行查询,可以使用gin或rum 联合索引接口(把需要搜索的字段都塞到一个索引里面,内部使用bitmap scan),但是这些索引创建速度比较慢,并且索引最多只能放32列(硬编码限制 src/include/pg_config_manual.h INDEX_MAX_KEYS)。
所以另一种选择是使用单列独立索引,当单列独立索引过滤性不够好时,数据库自动会选择多个索引做bitmap 合并扫描.
《PostgreSQL bitmapAnd, bitmapOr, bitmap index scan, bitmap heap scan》
那么每一列应该使用什么索引接口?
除了取决于数据类型,还取决于数据的分布,柱状图。
《PostgreSQL SQL自动优化案例 - 极简,自动推荐索引》
《自动选择正确索引访问接口(btree,hash,gin,gist,sp-gist,brin,bitmap...)的方法》
https://www.postgresql.org/docs/11/static/catalog-pg-type.html#CATALOG-TYPCATEGORY-TABLE
Code | Category | 推荐 索引接口 |
---|---|---|
A | Array types | gin |
B | Boolean types | btree , 不建议加索引,选择性不好 |
C | Composite types | - |
D | Date/time types | btree |
E | Enum types | - |
G | Geometric types | gist |
I | Network address types | gist, spgist |
N | Numeric types | btree |
P | Pseudo-types | - |
R | Range types | gist, spgist |
S | String types | btree : varchar text_pattern_ops , bpchar bpchar_pattern_ops ; 模糊查询(pg_trgm) gin : varchar gin_trgm_ops |
T | Timespan types | btree |
U | User-defined types | - |
V | Bit-string types | - |
X | unknown type | - |
select typcategory , typname from pg_type order by 1,2;
Code | Category | 推荐 索引接口 |
---|---|---|
U | aclitem | - |
U | box2d | - |
U | box2df | - |
U | box3d | - |
U | bytea | - |
U | cid | - |
U | gbtreekey16 | - |
U | gbtreekey32 | - |
U | gbtreekey4 | - |
U | gbtreekey8 | - |
U | gbtreekey_var | - |
U | geography | gist |
U | geometry | gist |
U | gidx | - |
U | gtrgm | - |
U | gtsvector | - |
U | json | gin |
U | jsonb | gin , OPS : jsonb_path_ops |
U | macaddr | - |
U | macaddr8 | - |
U | pg_lsn | - |
U | pgis_abs | - |
U | raster | - |
U | refcursor | - |
U | smgr | - |
U | spheroid | - |
U | tid | - |
U | tsquery | gin, rum |
U | tsvector | gin, rum |
U | txid_snapshot | - |
U | uuid | hash |
U | xid | btree |
U | xml | - |
写一个UDF函数,将以上规格写到UDF里面,自动生成每一列的索引SQL,自动使用合适的索引方法,OPS。
- create or replace function gen_whole_index_sqls(
- v_nsp name,
- v_tbl name,
- v_tbs name
- ) returns text[] as $$
- declare
- v_attname name;
- v_typid oid;
- v_typca "char";
- v_typname name;
- res text[];
- idxprefix text := to_char(clock_timestamp(),'yyyymmddhh24miss');
- idxsuffix int := 1;
- sql text := 'create index IF NOT EXISTS i'||idxprefix||'_%s on '||quote_ident(v_nsp)||'.'||quote_ident(v_tbl)||' using %s (%I %s) tablespace '||quote_ident(v_tbs)||' ;';
- begin
-
- for v_attname,v_typid in select attname,atttypid from pg_attribute where not attisdropped and attnum >= 1 and attrelid=(quote_ident(v_nsp)||'.'||quote_ident(v_tbl))::regclass
- loop
-
- select typcategory,typname into v_typca,v_typname from pg_type where oid=v_typid;
-
- case v_typca
- when 'A' then
- res := array_append(res, format(sql,idxsuffix,'gin',v_attname,''));
- when 'D', 'N', 'T' then
- res := array_append(res, format(sql,idxsuffix,'btree',v_attname,''));
- when 'S' then
- if v_typname='text' or v_typname='varchar' then
- res := array_append(res, format(sql,idxsuffix,'btree',v_attname,'text_pattern_ops'));
- elsif v_typname='bpchar' then
- res := array_append(res, format(sql,idxsuffix,'btree',v_attname,'bpchar_pattern_ops'));
- else
- res := array_append(res, format(sql,idxsuffix,'btree',v_attname,''));
- end if;
- -- 如果字符串要支持模糊查询,使用gin索引
- -- if v_typname='text' or v_typname='varchar' then
- -- res := array_append(res, format(sql,idxsuffix,'gin',v_attname,'gin_trgm_ops'));
- -- else
- -- res := array_append(res, format(sql,idxsuffix,'btree',v_attname,''));
- -- end if;
- when 'G' then
- if v_typname not in ('line') then
- res := array_append(res, format(sql,idxsuffix,'gist',v_attname,''));
- else
- continue;
- end if;
- when 'I', 'R' then
- res := array_append(res, format(sql,idxsuffix,'gist',v_attname,''));
- -- 可选spgist
- -- res := array_append(res, format(sql,idxsuffix,'spgist',v_attname,''));
- when 'U' then
- case v_typname
- when 'geography', 'geometry' then
- res := array_append(res, format(sql,idxsuffix,'gist',v_attname,''));
- when 'jsonb' then
- res := array_append(res, format(sql,idxsuffix,'gin',v_attname,'jsonb_path_ops'));
- -- 可选默认gin ops
- -- https://www.postgresql.org/docs/11/static/datatype-json.html#JSON-INDEXING
- -- res := array_append(res, format(sql,idxsuffix,'gin',v_attname,''));
- when 'tsvector' then
- res := array_append(res, format(sql,idxsuffix,'gin',v_attname,''));
- when 'uuid', 'xid' then
- res := array_append(res, format(sql,idxsuffix,'hash',v_attname,''));
- else
- continue;
- end case;
- else
- continue;
- end case;
-
- idxsuffix := idxsuffix+1;
-
- end loop;
-
- return res;
-
- end;
- $$ language plpgsql strict;
1、创建测试表,包含各种数据类型
- create table "你好t12" (
- c1 int,
- "-_c2&a-b" int8,
- c3 text,
- c4 varchar(1000),
- c5 char(1000),
- c6 "char",
- c7 timestamp,
- c8 interval,
- c9 int[],
- c10 tsvector,
- c11 tsquery,
- c12 time,
- c13 date,
- c14 numeric,
- c15 float,
- c16 point,
- c17 box,
- c18 line,
- c19 circle,
- c20 inet,
- c21 cidr,
- c22 int8range,
- c23 tsrange,
- c24 geometry,
- c25 geography,
- c26 uuid,
- c27 xid,
- c28 json,
- c29 jsonb
- );
2、使用本文提供的UDF,生成CREATE INDEX SQL
- select * from unnest(gen_whole_index_sqls('public','你好t12','pg_default'));
-
- unnest
- ------------------------------------------------------------------------------------------------------------------------------
- create index IF NOT EXISTS i20180903171836_1 on public."你好t12" using btree (c1 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_2 on public."你好t12" using btree ("-_c2&a-b" ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_3 on public."你好t12" using btree (c3 text_pattern_ops) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_4 on public."你好t12" using btree (c4 text_pattern_ops) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_5 on public."你好t12" using btree (c5 bpchar_pattern_ops) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_6 on public."你好t12" using btree (c6 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_7 on public."你好t12" using btree (c7 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_8 on public."你好t12" using btree (c8 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_9 on public."你好t12" using gin (c9 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_10 on public."你好t12" using gin (c10 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_11 on public."你好t12" using btree (c12 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_12 on public."你好t12" using btree (c13 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_13 on public."你好t12" using btree (c14 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_14 on public."你好t12" using btree (c15 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_15 on public."你好t12" using gist (c16 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_16 on public."你好t12" using gist (c17 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_17 on public."你好t12" using gist (c19 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_18 on public."你好t12" using gist (c20 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_19 on public."你好t12" using gist (c21 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_20 on public."你好t12" using gist (c22 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_21 on public."你好t12" using gist (c23 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_22 on public."你好t12" using gist (c24 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_23 on public."你好t12" using gist (c25 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_24 on public."你好t12" using hash (c26 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_25 on public."你好t12" using hash (c27 ) tablespace pg_default ;
- create index IF NOT EXISTS i20180903171836_26 on public."你好t12" using gin (c29 jsonb_path_ops) tablespace pg_default ;
- (26 rows)
3、创建索引测试
使用之前提到的并行跑后台任务的方法,并行创建多个索引,充分利用硬件资源加速。
《PostgreSQL dblink异步调用实践,跑并行多任务 - 例如开N个并行后台任务创建索引, 开N个后台任务跑若干SQL》
- select * from run_sqls_parallel(6, gen_whole_index_sqls('public','你好t12','pg_default')) as t(a text);
-
-
- NOTICE: the last 3 tasks running.
- NOTICE: whole tasks done.
- run_sqls_parallel
- -------------------
-
- (1 row)
4、检验
- postgres=# \d 你好t12
- Table "public.你好t12"
- Column | Type | Collation | Nullable | Default
- ----------+-----------------------------+-----------+----------+---------
- c1 | integer | | |
- -_c2&a-b | bigint | | |
- c3 | text | | |
- c4 | character varying(1000) | | |
- c5 | character(1000) | | |
- c6 | "char" | | |
- c7 | timestamp without time zone | | |
- c8 | interval | | |
- c9 | integer[] | | |
- c10 | tsvector | | |
- c11 | tsquery | | |
- c12 | time without time zone | | |
- c13 | date | | |
- c14 | numeric | | |
- c15 | double precision | | |
- c16 | point | | |
- c17 | box | | |
- c18 | line | | |
- c19 | circle | | |
- c20 | inet | | |
- c21 | cidr | | |
- c22 | int8range | | |
- c23 | tsrange | | |
- c24 | geometry | | |
- c25 | geography | | |
- c26 | uuid | | |
- c27 | xid | | |
- c28 | json | | |
- c29 | jsonb | | |
- Indexes:
- "i20180903171855_1" btree (c1)
- "i20180903171855_10" gin (c10)
- "i20180903171855_11" btree (c12)
- "i20180903171855_12" btree (c13)
- "i20180903171855_13" btree (c14)
- "i20180903171855_14" btree (c15)
- "i20180903171855_15" gist (c16)
- "i20180903171855_16" gist (c17)
- "i20180903171855_17" gist (c19)
- "i20180903171855_18" gist (c20)
- "i20180903171855_19" gist (c21)
- "i20180903171855_2" btree ("-_c2&a-b")
- "i20180903171855_20" gist (c22)
- "i20180903171855_21" gist (c23)
- "i20180903171855_22" gist (c24)
- "i20180903171855_23" gist (c25)
- "i20180903171855_24" hash (c26)
- "i20180903171855_25" hash (c27)
- "i20180903171855_26" gin (c29 jsonb_path_ops)
- "i20180903171855_3" btree (c3 text_pattern_ops)
- "i20180903171855_4" btree (c4 text_pattern_ops)
- "i20180903171855_5" btree (c5 bpchar_pattern_ops)
- "i20180903171855_6" btree (c6)
- "i20180903171855_7" btree (c7)
- "i20180903171855_8" btree (c8)
- "i20180903171855_9" gin (c9)
- postgres=# \di i20180903171855_*
- List of relations
- Schema | Name | Type | Owner | Table
- --------+--------------------+-------+----------+---------
- public | i20180903171855_1 | index | postgres | 你好t12
- public | i20180903171855_10 | index | postgres | 你好t12
- public | i20180903171855_11 | index | postgres | 你好t12
- public | i20180903171855_12 | index | postgres | 你好t12
- public | i20180903171855_13 | index | postgres | 你好t12
- public | i20180903171855_14 | index | postgres | 你好t12
- public | i20180903171855_15 | index | postgres | 你好t12
- public | i20180903171855_16 | index | postgres | 你好t12
- public | i20180903171855_17 | index | postgres | 你好t12
- public | i20180903171855_18 | index | postgres | 你好t12
- public | i20180903171855_19 | index | postgres | 你好t12
- public | i20180903171855_2 | index | postgres | 你好t12
- public | i20180903171855_20 | index | postgres | 你好t12
- public | i20180903171855_21 | index | postgres | 你好t12
- public | i20180903171855_22 | index | postgres | 你好t12
- public | i20180903171855_23 | index | postgres | 你好t12
- public | i20180903171855_24 | index | postgres | 你好t12
- public | i20180903171855_25 | index | postgres | 你好t12
- public | i20180903171855_26 | index | postgres | 你好t12
- public | i20180903171855_3 | index | postgres | 你好t12
- public | i20180903171855_4 | index | postgres | 你好t12
- public | i20180903171855_5 | index | postgres | 你好t12
- public | i20180903171855_6 | index | postgres | 你好t12
- public | i20180903171855_7 | index | postgres | 你好t12
- public | i20180903171855_8 | index | postgres | 你好t12
- public | i20180903171855_9 | index | postgres | 你好t12
- (26 rows)
1、本文提供了一个UDF,用于生成创建索引的SQL(返回SQL数组)
gen_whole_index_sqls('name space','表名','表空间名')
2、使用之前提到的并行跑后台任务的方法,并行创建多个索引,充分利用硬件资源加速。
《PostgreSQL dblink异步调用实践,跑并行多任务 - 例如开N个并行后台任务创建索引, 开N个后台任务跑若干SQL》
例如
select * from run_sqls_parallel(6, gen_whole_index_sqls('public','你好t12','pg_default')) as t(a text);
3、结合表的统计信息(analyze table后),可以把生成CREATE INDEX SQL做得更加完美。
《自动选择正确索引访问接口(btree,hash,gin,gist,sp-gist,brin,bitmap...)的方法》
https://www.postgresql.org/docs/11/static/plpgsql-control-structures.html#PLPGSQL-FOREACH-ARRAY
《PostgreSQL dblink异步调用实践,跑并行多任务 - 例如开N个并行后台任务创建索引, 开N个后台任务跑若干SQL》
《PostgreSQL SQL自动优化案例 - 极简,自动推荐索引》
《自动选择正确索引访问接口(btree,hash,gin,gist,sp-gist,brin,bitmap...)的方法》
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。