赞
踩
经常被问到为什么模糊匹配不走索引,验证几种情况(Pg9.4.18)
create table test(id int, info text);
insert into test select generate_series(1,1000000),md5(random()::text);
create index idx on test(info collate "C");
规则
索引、查询条件的collate必须一致才能使用索引。
collate<>"C"的话,使用默认的索引ops是不能走索引的需要制定对应类型的ops,例如
text_pattern_ops
varchar_pattern_ops
bpchar_pattern_ops
test01 走索引
create table testc(id int, info text collate "C");
insert into testc select generate_series(1,1000000),md5(random()::text);
create index idxc on testc(info collate "C");
explain (analyze,verbose,timing,costs,buffers) select * from testc where info like 'abcd%' collate "C";
test02 不走索引
create table testcz(id int, info text);
insert into testcz select generate_series(1,1000000),md5(random()::text);
create index idxcz on testcz(info collate "zh_CN");
explain (analyze,verbose,timing,costs,buffers) select * from testcz where info like 'abcd%' collate "zh_CN";
test03 使用对应的pattern ops走索引
create table testcp (id int, info text);
insert into testcp select generate_series(1,1000000),md5(random()::text);
create index indexcp on testcp(info text_pattern_ops);
explain (analyze,verbose,timing,costs,buffers) select * from testcp where info like 'abcd%';
test 04 使用对应的pattern ops走索引在zh_CN也是列时也走索引
create table testcc (id int, info text collate "zh_CN");
insert into testcc select generate_series(1,1000000),md5(random()::text);
create index idxcc on testcc(info text_pattern_ops);
explain (analyze,verbose,timing,costs,buffers) select * from testcc where info like 'abcd%';
test05 lc_collate<>C时不走索引,需要走索引的话使用对应的pattern ops
create table testcn (id int, info text collate "zh_CN");
insert into testcn select generate_series(1,1000000),md5(random()::text);
create index idxcn on testcn(info collate "zh_CN");
explain (analyze,verbose,timing,costs,buffers) select * from testcn where info like 'abcd%';
reverse()后同前模糊
test01 collate<>“C” 使用对应的text_pattern_ops可以搞定
drop table test;
create table test(id int, info text);
create index idx on test(info collate "C");
create index idx1 on test(reverse(info) collate "C");
insert into test select generate_series(1,1000000),md5(random()::text);
explain (analyze,verbose,timing,costs,buffers) select * from test where reverse(info) like '4152%' collate "C";
create table testz(id int, info text collate "zh_CN");
create index idxz on testz(reverse(info) text_pattern_ops);
insert into testz select generate_series(1,1000000),md5(random()::text);
explain (analyze,verbose,timing,costs,buffers) select * from testz where reverse(info) like '4152%';
test01 走索引
drop table test;
create extension pg_trgm;
create table test(id int, info text);
create index idx_test_text on test using gin (info gin_trgm_ops);
insert into test select generate_series(1,1000000),md5(random()::text);
explain (analyze,verbose,timing,costs,buffers) select * from test where info like '%4152%';
规则
test01
DB : test06 | pg9002311 | UTF8 | zh_CN.utf8 | zh_CN.utf8
create or replace function gen_hanzi(int) returns text as $$ declare res text; begin if $1 >=1 then select string_agg(chr(19968+(random()*20901)::int), '') into res from generate_series(1,$1); return res; end if; return null; end; $$ language plpgsql strict; create extension pg_trgm; create table test001(c1 text); insert into test001 select gen_hanzi(20) from generate_series(1,100000); create index idx_test001_1 on test001 using gin (c1 gin_trgm_ops);
走索引:性能没问题
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '你%'; --0.083 ms
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '%恧顤'; --0.039 ms
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '%燋邢賀%'; --0.042 ms
走索引:有性能问题!
-- DB : postgres | pg9002311 | UTF8 | C | C
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '你%'; --45.050 ms
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '%恧顤'; --61.116 ms
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '%燋邢賀%'; --60.768 ms
-- "C" 建表的时候制定collate "zh_CN"
explain (analyze,verbose,timing,costs,buffers) select * from test001 where c1 like '你%'; --46.050 ms
参考德哥文章,原文地址
https://github.com/digoal/blog/blob/master/201704/20170426_01.md
附:
测试这样的数据库 zfba=# \l List of databases Name | Owner | Encoding | Collation | Ctype | Access privileges -----------+---------+----------+-------------+-------------+--------------------- zfba | zfba | UTF8 | zh_CN.UTF-8 | zh_CN.UTF-8 | create database test06 with encoding 'UTF-8' lc_collate='zh_CN.utf8' lc_ctype='zh_CN.utf8' template template0; test06=# \l+ List of databases Name | Owner | Encoding | Collation | Ctype | -----------+-----------+----------+------------+------------+ test06 | pg9002311 | UTF8 | zh_CN.utf8 | zh_CN.utf8 | (5 rows) create table testcn (id int, info text); insert into testcn select generate_series(1,1000000),md5(random()::text); create index idxcn on testcn(info); -- 不走索引! explain (analyze,verbose,timing,costs,buffers) select * from testcn where info like 'abcd%'; -- 走索引! create index idxcx on testcn(info text_pattern_ops); explain (analyze,verbose,timing,costs,buffers) select * from testcn where info like 'abcd%';
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。