时间序列
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#用excel导入数据, 格式为csv
ori.data <
-
read.csv(
"lesson8.csv"
, header
=
F)
#以矩阵的方式读入数据, 按行排列, 每三列换一行
data <
-
matrix(as.matrix(ori.data), nrow(ori.data)
/
3
,
3
, byrow
=
TRUE)
#关闭区域特定的时间编码方式
Sys.setlocale(
"LC_TIME"
,
"C"
)
#用as.POSIXlt()读入字符串数据并转化为date数据, 赋值给date, 或as.Date()
date <
-
as.POSIXlt(data[,
1
], tz
=
"
", "
%
a
%
b
%
d
%
H:
%
M:
%
S HKT
%
Y")
#对ip和pv所在的列转化为数值型
IP <
-
as.numeric(data[,
2
])
PV <
-
as.numeric(data[,
3
])
head(data)
#恢复区域特地的时间编码方式
Sys.setlocale(
"LC_TIME"
, "")
#用ggplot2绘图
require(ggplot2)
#用reshape包中的melt函数分解数据
require(reshape2)
p.data <
-
data.frame(date, IP, PV)
meltdata <
-
melt(p.data,
id
=
(c(
"date"
)))
#用对IP和PV做分页处理, y轴刻度自由变化
graphic <
-
ggplot(data
=
meltdata, aes(x
=
date, y
=
value, color
=
variable))
+
geom_line()
+
geom_point()
graphic <
-
graphic
+
facet_grid(variable ~ ., scales
=
"free_y"
)
#美化, 添加标题, 坐标, 更改图例
graphic<
-
graphic
+
labs(x
=
"日期"
, y
=
"人次"
, title
=
"某网站7月至10月IP/PV统计"
)
+
theme(plot.title
=
element_text(size
=
20
, face
=
"bold"
))
+
scale_colour_discrete(name
=
"
",labels = c("
IP
","
PV"))
+
theme(strip.text.y
=
element_text(angle
=
0
))
|
地图
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
require(maps)
require(ggplot2)
#用直方图看下pop整体的分布
#可以发现数据分布较变化较大, 所以对pop做log转化
qplot(pop, data
=
us.cities, binwidth
=
0000
, geom
=
"histogram"
)
qplot(log(pop), data
=
us.cities, binwidth
=
0.03
, geom
=
"histogram"
)
#绘制背景地图
USA.POP <
-
ggplot(us.cities, aes(x
=
long
, y
=
lat))
+
xlim(
-
130
,
-
65
)
+
borders(
"state"
, size
=
0.5
)
+
geom_point(aes(size
=
log(pop), color
=
factor(capital), alpha
=
1
/
50
))
+
#对size标度的调整参考http://docs.ggplot2.org/0.9.3.1/scale_size.html
scale_size(
range
=
c(
0
,
7
), name
=
"log(City population)"
)
+
#对离散型颜色变量的标度调整参考http://docs.ggplot2.org/0.9.3.1/scale_manual.html
#对连续型颜色标量的标度调整参考http://docs.ggplot2.org/0.9.3.1/scale_brewer.html
#和http://docs.ggplot2.org/0.9.3.1/scale_gradient2.html
scale_color_manual(values
=
c(
"black"
,
"red"
), labels
=
c(
"state capital"
,
"city"
))
+
#调整图例
guides(color
=
guide_legend(title
=
NULL))
+
scale_alpha(guide
=
FALSE)
+
#绘制标题和坐标轴
labs(x
=
"longtitude"
, y
=
"latitude"
, title
=
"City Population in the United States"
)
+
theme(plot.title
=
element_text(size
=
20
))
#输出图像 并用cairo包进行抗锯齿处理
ggsave(USA.POP,
file
=
"USA_POP.png"
,
type
=
"cairo"
, width
=
10
, height
=
6.75
)
|
当然, 这只是简单的地图绘制方法,统计之都上也有很多大牛来用R绘制各种各样精美的地图(1, 2)。
剂量-效应曲线
R中的drc包很容易对各种剂量-效应曲线进行绘图, 此处采用较为常用的log-logistic四参数方程拟合了剂量-效应曲线。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
ori.data <
-
read.csv(
"D-R curve.csv"
)
require(drc)
require(reshape2)
#把数据融合
melt.data <
-
melt(ori.data,
id
=
c(
"dose"
), value.name
=
"response"
)[,
-
2
]
#用drc包中的log-logistic四参数方程进行拟合建模
model <
-
drm(response ~ dose, data
=
melt.data, fct
=
LL.
4
(names
=
c(
"Slope"
,
"Lower Limit"
,
"Upper Limit"
,
"EC50"
)))
#确定x轴范围并构建数据集
min
<
-
range
(ori.data$dose)[
1
]
max
<
-
range
(ori.data$dose)[
2
]
line.data <
-
data.frame(d.predict
=
seq(
min
,
max
, length.out
=
1000
))
#用模型预测数据构建数据集
line.data$p.predict <
-
predict(model, newdata
=
line.data)
#构建绘图数据, 能够计算误差棒
require(plyr)
p.data <
-
ddply(melt.data, .(dose), colwise(mean))
p.data$sd <
-
ddply(melt.data, .(dose), colwise(sd))[,
2
]
require(ggplot2)
p <
-
ggplot()
+
geom_errorbar(data
=
p.data, width
=
0.1
, size
=
1
,
aes(ymax
=
response
+
sd, ymin
=
response
-
sd, x
=
dose))
+
geom_point(data
=
p.data, aes(x
=
dose, y
=
response),
color
=
"red"
, alpha
=
0.5
, size
=
5
)
+
geom_line(data
=
line.data, aes(x
=
d.predict, y
=
p.predict),
size
=
1
, color
=
"blue"
)
+
#改变坐标轴间隔
scale_x_log10(name
=
"Dose"
,
breaks
=
c(
0.05
,
0.1
,
0.5
,
1
,
5
,
10
,
50
,
100
))
+
scale_y_continuous(name
=
"Response"
)
+
theme_bw()
#查看拟合模型参数
summary(model)
|