赞
踩
该数据集包含有关英格兰和威尔士自1995年起到2023年的房地产价格的数据,超过2800万条记录,未压缩形式的数据集大小超过4GB,在ClickHouse中需要约306MB。
- create table uk_price_paid(
- id string,
- price int,
- trans_date date,
- postcode string,
- type string,
- is_new string,
- duration string,
- addr1 string,
- addr2 string,
- street string,
- locality string,
- town string,
- district string,
- county string,
- category string
- )
- row format delimited fields terminated by '#' lines terminated by '\n' stored as textfile;
查看表
由于数据中有部分字段包含逗号‘,’,所以数据要预处理一下。
- # 查看文件中包含某个字符
- cat pp-complete.csv|grep '#'
- # 全文修改一个字符到另一个字符
- sed 's/","/"#"/g' pp-complete.csv > pp-complete-ext.csv
- sed 's/"#"/#/g' pp-complete.csv > pp-complete3.csv
load data local inpath '/home/datasets/pp-complete4.csv' into table uk_price_paid;
导入成功。
查看数据
select * from uk_price_paid limit 10;
select count(*) from uk_price_paid;
SELECT year(trans_date) year1, round(avg(price)) price from uk_price_paid GROUP BY year(trans_date) ORDER BY year(trans_date);
SELECT year(trans_date) as year, round(avg(price)) AS price FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year(trans_date) ORDER BY year(trans_date);
- SELECT town, district, count() as c, round(avg(price)) AS price
- FROM uk_price_paid
- WHERE date >= '2020-01-01'
- GROUP BY town, district
- HAVING c >= 100
- ORDER BY price DESC
- LIMIT 10;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。