我正在尝试使用 reshape2
来格式化这个 90-observation dstat数据,以便我可以以合理的方式绘制它,但我无法弄清楚如何将所有 foo.5
列转换为我可以使用 ggplot2 轻松绘制的内容。
> library(RCurl)
> x <- getURL("https://gist.githubusercontent.com/hamiltont/27d9f93ad53d8978bccd/raw/ec6118805b9ab8606a303dc81450a6f760864cf9/dstat.csv")
> dstat = read.csv(file="stats", skip=6, header=TRUE)
> names(dstat)
[1] "usr" "sys" "idl" "wai" "hiq"
[6] "siq" "usr.1" "sys.1" "idl.1" "wai.1"
[11] "hiq.1" "siq.1" "usr.2" "sys.2" "idl.2"
[16] "wai.2" "hiq.2" "siq.2" "usr.3" "sys.3"
[21] "idl.3" "wai.3" "hiq.3" "siq.3" "usr.4"
[26] "sys.4" "idl.4" "wai.4" "hiq.4" "siq.4"
[31] "usr.5" "sys.5" "idl.5" "wai.5" "hiq.5"
[36] "siq.5" "usr.6" "sys.6" "idl.6" "wai.6"
[41] "hiq.6" "siq.6" "usr.7" "sys.7" "idl.7"
[46] "wai.7" "hiq.7" "siq.7" "usr.8" "sys.8"
[51] "idl.8" "wai.8" "hiq.8" "siq.8" "usr.9"
[56] "sys.9" "idl.9" "wai.9" "hiq.9" "siq.9"
[61] "usr.10" "sys.10" "idl.10" "wai.10" "hiq.10"
[66] "siq.10" "usr.11" "sys.11" "idl.11" "wai.11"
[71] "hiq.11" "siq.11" "usr.12" "sys.12" "idl.12"
[76] "wai.12" "hiq.12" "siq.12" "usr.13" "sys.13"
[81] "idl.13" "wai.13" "hiq.13" "siq.13" "usr.14"
[86] "sys.14" "idl.14" "wai.14" "hiq.14" "siq.14"
[91] "usr.15" "sys.15" "idl.15" "wai.15" "hiq.15"
[96] "siq.15" "usr.16" "sys.16" "idl.16" "wai.16"
[101] "hiq.16" "siq.16" "usr.17" "sys.17" "idl.17"
[106] "wai.17" "hiq.17" "siq.17" "usr.18" "sys.18"
[111] "idl.18" "wai.18" "hiq.18" "siq.18" "usr.19"
[116] "sys.19" "idl.19" "wai.19" "hiq.19" "siq.19"
[121] "usr.20" "sys.20" "idl.20" "wai.20" "hiq.20"
[126] "siq.20" "usr.21" "sys.21" "idl.21" "wai.21"
[131] "hiq.21" "siq.21" "usr.22" "sys.22" "idl.22"
[136] "wai.22" "hiq.22" "siq.22" "usr.23" "sys.23"
[141] "idl.23" "wai.23" "hiq.23" "siq.23" "read"
[146] "writ" "recv" "send" "recv.1" "send.1"
[151] "recv.2" "send.2" "in." "out" "int"
[156] "csw" "X20" "X21" "X23" "X76"
[161] "X77" "X78" "X79" "X80" "X81"
[166] "X82" "X83" "X1m" "X5m" "X15m"
[171] "used" "buff" "cach" "free" "run"
[176] "blk" "new" "read.1" "writ.1" "used.1"
[181] "free.1" "epoch" "X.aio" "files" "inodes"
[186] "msg" "sem" "shm" "pos" "lck"
[191] "rea" "wri" "raw" "tot" "tcp"
[196] "udp" "raw.1" "frg" "lis" "act"
[201] "syn" "tim" "clo" "raw.2" "tot.1"
[206] "tcp.1" "udp.1" "raw.3" "frg.1" "lis.1"
[211] "act.1" "syn.1" "tim.1" "clo.1" "lis.2"
[216] "act.2" "dgm" "str" "lis.3" "act.3"
[221] "majpf" "minpf" "alloc" "free.2" "util"
[226] "call" "retr" "refr" "call.1" "erca"
[231] "erau" "ercl" "xdrc"
对于我的第一个情节,我只是喜欢将所有 usr.*
列显示为不同颜色的线条,我会使用这样的东西来做:
ggplot(dstat, aes(x=observation, y=usr, color=usr_type) +
geom_line()
如果我能让我的数据框看起来像这样:
observation | usr_type | value
1 usr 22
1 usr.1 32
1 usr.2 12
<snip>
2 usr 21
2 usr.1 20
2 usr.2 20
请您参考如下方法:
有几种方法可以做到这一点,但我建议使用我的“splitstackshape”包中的Stacked
。
基本的方法是这样的:
library(splitstackshape) ## Also loads "data.table"
DT <- as.data.table(dstat, keep.rownames = TRUE)
Stacked(DT, id.vars = "rn", var.stubs = "usr",
sep = "var.stubs", keep.all = FALSE)
# rn .time_1 usr
# 1: 1 0.027
# 2: 1 .1 8.068
# 3: 1 .10 0.022
# 4: 1 .11 3.276
# 5: 1 .12 0.003
# ---
# 2060: 9 .5 0.000
# 2061: 9 .6 0.000
# 2062: 9 .7 0.000
# 2063: 9 .8 0.000
# 2064: 9 .9 0.000
但是,您还可以做更有趣的事情,例如“堆叠”多组列。例如,要使用“usr”、“sys”和“wai”变量的“堆叠”版本创建一个 list
,您可以这样做:
varsOfInterest <- c("usr", "sys", "wai")
Stacked(DT, id.vars = "rn", var.stubs = varsOfInterest,
sep = "var.stubs", keep.all = FALSE)
# $usr
# rn .time_1 usr
# 1: 1 0.027
# 2: 1 .1 8.068
# 3: 1 .10 0.022
# 4: 1 .11 3.276
# 5: 1 .12 0.003
# ---
# 2060: 9 .5 0.000
# 2061: 9 .6 0.000
# 2062: 9 .7 0.000
# 2063: 9 .8 0.000
# 2064: 9 .9 0.000
#
# $sys
# rn .time_1 sys
# 1: 1 0.010
# 2: 1 .1 5.000
# 3: 1 .10 0.009
# 4: 1 .11 2.559
# 5: 1 .12 0.001
# ---
# 2060: 9 .5 2.000
# 2061: 9 .6 0.000
# 2062: 9 .7 0.000
# 2063: 9 .8 0.000
# 2064: 9 .9 0.000
#
# $wai
# rn .time_1 wai
# 1: 1 0.027
# 2: 1 .1 0.036
# 3: 1 .10 0.003
# 4: 1 .11 0.038
# 5: 1 .12 0.003
# ---
# 2060: 9 .5 0.000
# 2061: 9 .6 0.000
# 2062: 9 .7 0.000
# 2063: 9 .8 0.000
# 2064: 9 .9 0.000
sep = "var.stubs"
参数基本上表示列名中没有真正的分隔符。对于您的数据集,这不是完全正确的,但它使您不必将每个集合的第一列重命名为“usr.0”之类的名称。如果数据已经是那种形式,那么您就可以使用 .
作为 sep
的值。