aboutsummaryrefslogtreecommitdiffstats
path: root/r/fitdistrplus-example.R
diff options
context:
space:
mode:
authorAaron LI <aaronly.me@gmail.com>2016-03-31 10:57:34 +0800
committerAaron LI <aaronly.me@gmail.com>2016-03-31 10:57:34 +0800
commitc9c896dea2ba43551c4e10bd49666105449e9bd7 (patch)
treee94b73f17b2d776c2acd4c9549657f500c3dc7ce /r/fitdistrplus-example.R
parent2b6cb9b655a53d43b32a8a211287c82f4f59999a (diff)
downloadatoolbox-c9c896dea2ba43551c4e10bd49666105449e9bd7.tar.bz2
add all scripts/tools
Diffstat (limited to 'r/fitdistrplus-example.R')
-rw-r--r--r/fitdistrplus-example.R54
1 files changed, 54 insertions, 0 deletions
diff --git a/r/fitdistrplus-example.R b/r/fitdistrplus-example.R
new file mode 100644
index 0000000..b5b1a57
--- /dev/null
+++ b/r/fitdistrplus-example.R
@@ -0,0 +1,54 @@
+n <- 50
+m <- 50
+set.seed(1)
+mu <- -0.4
+sig <- 0.12
+x <- matrix(data=rlnorm(n*m, mu, sig), nrow=m)
+
+library(fitdistrplus)
+## Fit a log-normal distribution to the 50 random data set
+f <- apply(x, 2, fitdist, "lnorm")
+
+## Plot the results
+for(i in 1:n)
+plot(f[[i]])
+
+## Save plot in an animated GIF-file
+library(animation)
+saveGIF({for(i in 1:n) plot(f[[i]])})
+
+apply((sapply(f, "[[", "estimate")),1, summary)
+# meanlog sdlog
+# Min. -0.4347 0.09876
+# 1st Qu. -0.4140 0.11480
+# Median -0.4010 0.12110
+# Mean -0.4011 0.12270
+# 3rd Qu. -0.3899 0.12950
+# Max. -0.3522 0.14780
+
+
+## How much variance can we expect in the mean and std?
+## Expeted mean
+ExpectedMean <- function(mu, sig) exp(mu+ sig^2/2)
+## Expected std
+ExpectedStd <- function(mu, sig) sqrt((exp(sig^2)-1)*exp(2*mu + sig^2))
+
+summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedMean(x[1], x[2])))
+# Min. 1st Qu. Median Mean 3rd Qu. Max.
+# 0.6529 0.6665 0.6747 0.6748 0.6819 0.7087
+summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedStd(x[1], x[2])))
+# Min. 1st Qu. Median Mean 3rd Qu. Max.
+# 0.06604 0.07880 0.08212 0.08316 0.08794 0.10170
+
+## Let's look at the goodness of fit statistics to get an
+## idea how much variance we can expect there:
+gof.ln <- lapply(f, gofstat)
+gof.test <- lapply(gof.ln, function(x) data.frame(x[c("chisqpvalue", "cvm", "ad", "ks")]))
+apply(do.call("rbind", gof.test), 2, summary)
+# chisqpvalue cvm ad ks
+# Min. 0.0002673 0.02117 0.1537 0.05438
+# 1st Qu. 0.1394000 0.03755 0.2708 0.07488
+# Median 0.3578000 0.04841 0.3216 0.08054
+# Mean 0.3814000 0.05489 0.3564 0.08431
+# 3rd Qu. 0.6409000 0.06913 0.4358 0.09436
+# Max. 0.9245000 0.13220 0.7395 0.12570 \ No newline at end of file