diff options
author | Aaron LI <aaronly.me@gmail.com> | 2016-03-31 10:57:34 +0800 |
---|---|---|
committer | Aaron LI <aaronly.me@gmail.com> | 2016-03-31 10:57:34 +0800 |
commit | c9c896dea2ba43551c4e10bd49666105449e9bd7 (patch) | |
tree | e94b73f17b2d776c2acd4c9549657f500c3dc7ce /r | |
parent | 2b6cb9b655a53d43b32a8a211287c82f4f59999a (diff) | |
download | atoolbox-c9c896dea2ba43551c4e10bd49666105449e9bd7.tar.bz2 |
add all scripts/tools
Diffstat (limited to 'r')
-rw-r--r-- | r/chisq.R | 21 | ||||
-rw-r--r-- | r/fillpois.R | 130 | ||||
-rw-r--r-- | r/fitdistrplus-example.R | 54 | ||||
-rw-r--r-- | r/forceFieldTransform.R | 46 | ||||
-rw-r--r-- | r/kldiv.R | 349 | ||||
-rw-r--r-- | r/lsos.R | 45 | ||||
-rw-r--r-- | r/scaleSaliency.R | 246 |
7 files changed, 891 insertions, 0 deletions
diff --git a/r/chisq.R b/r/chisq.R new file mode 100644 index 0000000..41f851f --- /dev/null +++ b/r/chisq.R @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# +# Calculate the chi-squared values between the given data and model values. +# + +calc.chisq <- function(value, error=NULL, model=NULL) { + if (is.data.frame(value)) { + df <- value + value <- df$value + error <- df$error + model <- df$model + } + chisq <- (value - model)^2 + if (! is.null(error)) { + weights <- error ^ (-2) + chisq <- chisq * weights + } + return(sum(chisq)) +} + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/fillpois.R b/r/fillpois.R new file mode 100644 index 0000000..be590f8 --- /dev/null +++ b/r/fillpois.R @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# +# Identify the abnormal pixels (with relatively larger values) in the +# given X-ray count image, and replace their values with random Poisson +# values whose parameter lambda is determined by the neighboring pixels. +# +# +# Aaron LI +# 2015/09/01 +# Updated: 2015/09/02 +# + +# Fill a vector of a row/column of X-ray image with Poisson distribution. +# +# The parameter lambda of the Poisson distribution is determined by the +# mean value of the left n (default: 3) elements. +# +# If the value of one element is greater than or equal to (>=) +# qpois(prob, lambda), then its value is replaced with rpois(1, lambda). +# +# Arguments: +# vec - input data vector +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a vector of the same length with abnormal values replaced +fill.pois.vec <- function(vec, n=3, prob=0.95) { + # Select the given index of element from the vector + # Arguments: + # bc - boundary condition: + # + "cyclic": vec[0, -1] := vec[length(vec), length(vec)-1] + # + "symmetric": vec[0, -1] := vec[1, 2] + elem <- function(vec, index, bc="cyclic") { + if (index <= 0) { + if (bc == "cyclic") { + index <- length(vec) + index + } else if (bc == "symmetric") { + index <- 1 - index + } else { + stop(paste("Invalid boundary condition:", bc, "\n")) + } + } + return(vec[index]) + } + # Calculate the mean value of left n elements for the given element. + mean.left <- function(vec, index, n=3, bc="cyclic") { + elements <- get(class(vec))(n) + for (i in 1:n) { + elements[i] <- elem(vec, index-i) + } + return(mean(elements)) + } + # + vec.filled <- vec + for (i in 1:length(vec)) { + lambda <- mean.left(vec, i, n) + if (vec[i] >= qpois(prob, lambda)) { + vec.filled[i] <- rpois(1, lambda) + } + } + return(vec.filled) +} + + +# Fill a matrix of the X-ray image with Poisson distribution by row or column. +# +# For more details, see 'fill.pois.vec()'. +# +# Arguments: +# mat - input image matrix +# byrow - where Poisson fill the matrix by row (default by column) +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a matrix of the same size with abnormal values replaced +fill.pois.mat <- function(mat, byrow=FALSE, n=3, prob=0.95) { + mat.filled <- mat + if (byrow) { + # process by row + rows <- nrow(mat) + for (r in 1:rows) { + vec <- mat[r, ] + vec.filled <- fill.pois.vec(vec, n, prob) + mat.filled[r, ] <- vec.filled + } + } else { + # process by column + cols <- ncol(mat) + for (c in 1:cols) { + vec <- mat[, c] + vec.filled <- fill.pois.vec(vec, n, prob) + mat.filled[, c] <- vec.filled + } + } + return(mat.filled) +} + + +# Identify the abnormal pixels (with relatively larger values) in the +# given X-ray count image, and replace their values with random Poisson +# values whose parameter lambda is determined by the neighboring pixels. +# +# The refilled image is the average of the two images, which are the +# original image processed with 'fill.pois.mat()' by row and column, +# respectively. +# +# TODO: to verify??? +# The two-step procedure is employed to avoid the grid-like pattern/structure +# in the refilled image. +# +# For more details, see 'fill.pois.vec()'. +# +# Arguments: +# img - input image (a matrix) +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a matrix of the same size with abnormal values replaced +fill.pois.img <- function(img, n=3, prob=0.95) { + img.fillbycol <- fill.pois.mat(img, byrow=FALSE, n=n, prob=prob) + img.fillbyrow <- fill.pois.mat(img, byrow=TRUE, n=n, prob=prob) + img.filled <- (img.fillbycol + img.fillbyrow) / 2 + return(img.filled) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/fitdistrplus-example.R b/r/fitdistrplus-example.R new file mode 100644 index 0000000..b5b1a57 --- /dev/null +++ b/r/fitdistrplus-example.R @@ -0,0 +1,54 @@ +n <- 50 +m <- 50 +set.seed(1) +mu <- -0.4 +sig <- 0.12 +x <- matrix(data=rlnorm(n*m, mu, sig), nrow=m) + +library(fitdistrplus) +## Fit a log-normal distribution to the 50 random data set +f <- apply(x, 2, fitdist, "lnorm") + +## Plot the results +for(i in 1:n) +plot(f[[i]]) + +## Save plot in an animated GIF-file +library(animation) +saveGIF({for(i in 1:n) plot(f[[i]])}) + +apply((sapply(f, "[[", "estimate")),1, summary) +# meanlog sdlog +# Min. -0.4347 0.09876 +# 1st Qu. -0.4140 0.11480 +# Median -0.4010 0.12110 +# Mean -0.4011 0.12270 +# 3rd Qu. -0.3899 0.12950 +# Max. -0.3522 0.14780 + + +## How much variance can we expect in the mean and std? +## Expeted mean +ExpectedMean <- function(mu, sig) exp(mu+ sig^2/2) +## Expected std +ExpectedStd <- function(mu, sig) sqrt((exp(sig^2)-1)*exp(2*mu + sig^2)) + +summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedMean(x[1], x[2]))) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 0.6529 0.6665 0.6747 0.6748 0.6819 0.7087 +summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedStd(x[1], x[2]))) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 0.06604 0.07880 0.08212 0.08316 0.08794 0.10170 + +## Let's look at the goodness of fit statistics to get an +## idea how much variance we can expect there: +gof.ln <- lapply(f, gofstat) +gof.test <- lapply(gof.ln, function(x) data.frame(x[c("chisqpvalue", "cvm", "ad", "ks")])) +apply(do.call("rbind", gof.test), 2, summary) +# chisqpvalue cvm ad ks +# Min. 0.0002673 0.02117 0.1537 0.05438 +# 1st Qu. 0.1394000 0.03755 0.2708 0.07488 +# Median 0.3578000 0.04841 0.3216 0.08054 +# Mean 0.3814000 0.05489 0.3564 0.08431 +# 3rd Qu. 0.6409000 0.06913 0.4358 0.09436 +# Max. 0.9245000 0.13220 0.7395 0.12570
\ No newline at end of file diff --git a/r/forceFieldTransform.R b/r/forceFieldTransform.R new file mode 100644 index 0000000..92310c1 --- /dev/null +++ b/r/forceFieldTransform.R @@ -0,0 +1,46 @@ +# -*- coding: utf -*- +# +# Calculate the "force field transform" of the image, using the +# specified *cell* size. +# +# The image is padded with the mirrored boundary condition. +# +# NOTE:TODO: +# The transformation output strengths image is NOT normalized! +# +# +# Credit: +# [1] TODO: +# Hurley et al., 2002, 2005 +# +# +# Aaron LI +# 2015/08/28 +# + + +# The attractive force between two points on the image. +# NOTE: the coefficient is ignored +# +# Arguments: +# p0, p1 - (r, c, value), the row and column number of the point position, +# and the value of that point +# +# Return: +# the force vector (f_r, f_c): +# 'f_r': force along the row direction, positive goes downside +# 'f_c': force along the column direction, positive goes to the right +# Note that this is the force that 'p1' act on 'p0', and is directed +# to point 'p1'. +p2p.force <- function(p0, p1) { + r0 = p0[1] + c0 = p0[2] + r1 = p1[1] + c1 = p1[2] + f_r = p0[3]*p1[3] * (r1-r0) / ((r1-r0)^2 + (c1-c0)^2)^1.5 + f_c = p0[3]*p1[3] * (c1-c0) / ((r1-r0)^2 + (c1-c0)^2)^1.5 + return(c(f_r, f_c)) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/kldiv.R b/r/kldiv.R new file mode 100644 index 0000000..a767038 --- /dev/null +++ b/r/kldiv.R @@ -0,0 +1,349 @@ +# -*- coding: utf-8 -*- +# +# Kullback-Leibler or Jensen-Shannon divergence between two distributions +# +# The Kullback-Leibler divergence is given by: +# D_{KL}(P(x), Q(x)) = sum[ P(x) * log(P(x) / Q(x)) ] +# where P(x) is the underground true distribution, and Q(x) the approximation +# distribution. Thus KL divergence measures the information lost when Q is +# used to approximate P. +# +# The Jensen-Shannon divergence is given by: +# D_{JS}(P, Q) = 0.5 * D_{KL}(P, M) + 0.5 * D_{KL}(Q, M); M = (P+Q)/2 +# This is a symmetrised divergence, and is equal to 1/2 the so-called +# Jeffrey divergence. +# +# Credits: +# [1] Wikipedia - Kullback-Leibler divergence +# https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence +# [2] David Fass, KLDIV +# http://www.mathworks.com/matlabcentral/fileexchange/13089-kldiv/content//kldiv.m +# +# Aaron LI +# 2015/09/04 +# + + +# Calculate the entropy of the probability mass distribution. +# The zeros are ignored. +# +# Arguments: +# x - probability mass distribution +# +# Return: +# entropy in unit "bits" +# +calc.entropy <- function(x) { + x.gt0 <- x[x>0] + return(sum(-x.gt0 * log2(x.gt0))) +} + + +# Calculate the KL divergence of distribution P from Q, or the JS divergence +# between the P and Q distributions. +# +# TODO: +# * to add other methods to deal with zero probabilities: +# - add eps to p.m.f and renormalize +# - Byesian prior +# - smoothing +# +# Credits: +# [1] Calculate the Kullback-Leibler Divergence in practice? +# http://stats.stackexchange.com/questions/97938/calculate-the-kullback-leibler-divergence-in-practice +# [2] How to compute KL-divergence when PMF contains 0s? +# http://mathoverflow.net/questions/72668/how-to-compute-kl-divergence-when-pmf-contains-0s +# +# Arguments: +# p - probabilities representing the distribution P (underground true) +# q - probabilities representing the distribution Q (approximation) +# type - which type of divergence to be calculated +# + "kl": (default) Kullback-Leibler divergence +# + "klsym": symmetric variant of the Kullback-Leibler divergence, +# which given by (KL(p, q) + KL(q, p))/2 +# + "js": Jensen-Shannon divergence +# zeros - how to deal with the zeros in each distribution probabilities +# + "ignore": just ignore the data points with probability of zero +# +# Note that the vectors p and q must have the same length, and the +# sum of probabilities p and q must be 1 +/- 1e-5 +# +# Return: +# calculate divergence value in unit "bits" +# +kldiv <- function(p, q, type="kl", zeros="ignore") { + # check length of vectors + stopifnot(length(p) == length(q)) + # validate probabilities + eps_prob <- 1e-5 + stopifnot(abs(sum(p) - 1) <= eps_prob, abs(sum(q) - 1) <= eps_prob) + # how to deal with zero probabilities + if (zeros == "ignore") { + # just ignore the zeros in probabilities + nonzeros <- (p > 0) & (q > 0) + p <- p[nonzeros] + q <- q[nonzeros] + } else { + stop(paste("Unsupported parameter value zeros=", zeros, "\n", sep="")) + } + # check divergence type + if (type == "kl") { + # Kullback-Leibler divergence + div <- sum(p * (log2(p) - log2(q))) + } else if (type == "klsym") { + # symmetric variant KL divergence + div <- 0.5 * (sum(p * (log2(p) - log2(q))) + + sum(q * (log2(q) - log2(p)))) + } else if (type == "js") { + # Jensen-Shannon divergence + m <- (p + q) / 2 + div <- 0.5 * (sum(p * (log2(p) - log2(m))) + + sum(q * (log2(q) - log2(m)))) + } else { + stop(paste("Unsupported parameter value type=", type, "\n", sep="")) + } + return(div) +} + + +# Estimate the probability mass distribution for the observation data, +# using "density()". +# The range of output coordinates of points is set to be: +# from: min(x) - cut*bw +# to: max(x) + cut*bw +# And the probability mass distribution is normalized. +# +# Arguments: +# x - input observation data +# n - number of equally spaced points at which the probability mass is +# to be estimated. +# bw - bandwidth to be used +# kernel - the smoothing kernel +# from - coordinate of the left-most point +# to - coordinate of the right-most point +# cut - c(left, right). Number of bandwidths beyond the left and right +# extremes of the input data. +# This allows the estimated density to drop to approximately zero +# at the extremes. +# If "from" and "to" specified, then "cut" is ignored. +# +# Returns: +# list with following components: +# x - the coordinates of the points where probability mass estimated +# y - the estimated probability mass +# bw - bandwidth used +# kernel - kernel used +# n - sample size +# cut - left and right cut used +# from - coordinate of the left-most point used +# to - coordinate of the right-most point used +# +estimate.prob.mass <- function(x, bw="nrd0", kernel="gaussian", n=512, + from=NULL, to=NULL, cut=c(3,3)) { + data <- x[!is.na(x)] + # calculate the bandwidth + bw <- get(paste("bw.", bw, sep=""))(data) + # determine the estimation range + if (is.null(from)) { + from <- min(data) - cut[1] * bw + } + if (is.null(to)) { + to <- max(data) + cut[2] * bw + } + # estimate with "density()" + d <- density(data, bw=bw, kernel=kernel, n=n, from=from, to=to) + # renormalize the probability mass distribution + pmass <- d$y / sum(d$y) + prob.mass <- list(x=d$x, y=pmass, bw=bw, kernel=kernel, + n=n, from=from, to=to, cut=cut) + return(prob.mass) +} + + +# Estimate the probability mass distribution for the source and corresponding +# background data using 'estimate.prob.mass()'. +# +# The coordinates at which the probability masses are estimated are the same +# for the source and corresponding background probability mass distributions. +# Therefore we can calculate the KL divergence between these two distributions. +# +# Argument: +# srcdata - raw counts data drawn from the source region +# bkgdata - raw counts data drawn from the background region +# +# Return: +# data.frame with the following components: +# x - the coordinates of the points where probability mass estimated +# src - the estimated probability masses of the source data +# bkg - the estimated probability masses of the background data +# +pm.src.bkg <- function(srcdata, bkgdata) { + # compare the data ranges + if (max(srcdata) > max(bkgdata)) { + pm.src <- estimate.prob.mass(srcdata) + from <- pm.src$from + to <- pm.src$to + pm.bkg <- estimate.prob.mass(bkgdata, from=from, to=to) + } else { + pm.bkg <- estimate.prob.mass(bkgdata) + from <- pm.bkg$from + to <- pm.bkg$to + pm.src <- estimate.prob.mass(srcdata, from=from, to=to) + } + df <- data.frame(x=pm.src$x, src=pm.src$y, bkg=pm.bkg$y) + return(df) +} + + +# Calculate the entropies and KL/JS divergences of the source and background +# probability mass distribution group. +# +# Arguments: +# pmdf - data.frame of the probability mass distribution +# comp - components to be calculated +# + "entropy": entropy of the source and background +# + "kl": KL divergences from source to background and vice versa +# + "klsym": symmetric variant of KL divergence +# + "js": JS divergence +# +# Return: +# list with following components: +# entropy.src - entropy of the source distribution +# entropy.bkg - entropy of the background distribution +# kl.src2bkg - KL divergence from source to background +# kl.bkg2src - KL divergence from background to source +# klsym - symmetric variant KL divergence +# js - JS divergence +info.src.bkg <- function(pmdf, comp=c("entropy", "kl", "klsym", "js")) { + pm.src <- pmdf$src + pm.bkg <- pmdf$bkg + entropy.src <- NULL + entropy.bkg <- NULL + kl.src2bkg <- NULL + kl.bkg2src <- NULL + klsym <- NULL + js <- NULL + if ("entropy" %in% comp) { + entropy.src <- calc.entropy(pm.src) + entropy.bkg <- calc.entropy(pm.bkg) + } + if ("kl" %in% comp) { + kl.src2bkg <- kldiv(pm.src, pm.bkg, type="kl") + kl.bkg2src <- kldiv(pm.bkg, pm.src, type="kl") + } + if ("klsym" %in% comp) { + klsym <- kldiv(pm.src, pm.bkg, type="klsym") + } + if ("js" %in% comp) { + js <- kldiv(pm.src, pm.bkg, type="js") + } + return(list(entropy.src=entropy.src, entropy.bkg=entropy.bkg, + kl.src2bkg=kl.src2bkg, kl.bkg2src=kl.bkg2src, + klsym=klsym, js=js)) +} + + +# Calculate the entropies and KL/JS divergences of the source density +# histogram with respect to the corresponding background data which +# drawn from the estimated Poisson mass distribution. +# +# Arguments: +# src - raw counts data of the source region +# comp - components to be calculated +# + "entropy": entropy of the source and background +# + "kl": KL divergences from source to background and vice versa +# + "klsym": symmetric variant of KL divergence +# + "js": JS divergence +# +# Return: +# list with following components: +# entropy.src - entropy of the source distribution +# entropy.bkg - entropy of the background distribution +# kl.src2bkg - KL divergence from source to background +# kl.bkg2src - KL divergence from background to source +# klsym - symmetric variant KL divergence +# js - JS divergence +# +info.src.pois <- function(src, comp=c("entropy", "kl", "klsym", "js")) { + # make the density histogram of the source counts data + hist.src <- hist(src, breaks=(min(src):(max(src)+1)-0.5), plot=FALSE) + x <- hist.src$mids + pm.src <- hist.src$density + # calculate the corresponding theoretical Poisson density/mass distribution + # as the estimated background + lambda <- mean(src) + pm.pois <- dpois(x, lambda) + pm.pois <- pm.pois / sum(pm.pois) + # calculate the entropy, KL/JS divergences + entropy.src <- NULL + entropy.bkg <- NULL + kl.src2bkg <- NULL + kl.bkg2src <- NULL + klsym <- NULL + js <- NULL + if ("entropy" %in% comp) { + entropy.src <- calc.entropy(pm.src) + entropy.bkg <- calc.entropy(pm.pois) + } + if ("kl" %in% comp) { + kl.src2bkg <- kldiv(pm.src, pm.pois, type="kl") + kl.bkg2src <- kldiv(pm.pois, pm.src, type="kl") + } + if ("klsym" %in% comp) { + klsym <- kldiv(pm.src, pm.pois, type="klsym") + } + if ("js" %in% comp) { + js <- kldiv(pm.src, pm.pois, type="js") + } + return(list(entropy.src=entropy.src, entropy.bkg=entropy.bkg, + kl.src2bkg=kl.src2bkg, kl.bkg2src=kl.bkg2src, + klsym=klsym, js=js)) +} + + +# Calculate the information (e.g., entropy, divergences) for each group of +# region data. +# If the background data are not provided, then the background is estimated +# with a Poisson density/mass distribution. +info.reglist <- function(srcdatalist, bkgdatalist=NULL) { + if (is.null(bkgdatalist)) { + infofunc <- "info.src.pois" + } else { + infofunc <- "info.src.bkg" + stopifnot(length(srcdatalist) == length(bkgdatalist)) + } + l <- length(srcdatalist) + infodf <- data.frame(entropy.src=numeric(l), entropy.bkg=numeric(l), + kl.src2bkg=numeric(l), kl.bkg2src=numeric(l), + klsym=numeric(l), js=numeric(l)) + for (i in 1:length(srcdatalist)) { + #cat(i, "\n") + if (is.null(bkgdatalist)) { + if (sum(srcdatalist[[i]]) == 0) { + # srcdata all zeros + cat(i, ": WARNING: srcdata are all zeros!\n") + info <- list(entropy.src=NA, entropy.bkg=NA, + kl.src2bkg=NA, kl.bkg2src=NA, + klsym=NA, js=NA) + } else { + info <- get(infofunc)(srcdatalist[[i]]) + } + } else { + if (sum(srcdatalist[[i]]) == 0 || sum(bkgdatalist[[i]]) == 0) { + # srcdata / bkgdata all zeros + cat(i, ": WARNING: srcdata/bkgdata are all zeros!\n") + info <- list(entropy.src=NA, entropy.bkg=NA, + kl.src2bkg=NA, kl.bkg2src=NA, + klsym=NA, js=NA) + } else { + pmdf <- pm.src.bkg(srcdatalist[[i]], bkgdatalist[[i]]) + info <- get(infofunc)(pmdf) + } + } + infodf[i, ] <- info + } + return(infodf) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/lsos.R b/r/lsos.R new file mode 100644 index 0000000..16a3e7f --- /dev/null +++ b/r/lsos.R @@ -0,0 +1,45 @@ +# -*- encoding: utf-8 -*- + +# Tricks to manage the available memory in an R session +# http://stackoverflow.com/q/1358003/4856091 + +# improved list of objects +.ls.objects <- function(pos=1, pattern, order.by, + decreasing=FALSE, pretty.size=FALSE, + head=FALSE, n=10) { + napply <- function(names, fn) { + sapply(names, function(x) fn(get(x, pos=pos))) + } + names <- ls(pos=pos, pattern=pattern) + obj.class <- napply(names, function(x) as.character(class(x))[1]) + obj.mode <- napply(names, mode) + obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class) + obj.size.bytes <- napply(names, object.size) + if (pretty.size) { + obj.size <- napply(names, function(x) { + format(object.size(x), units="auto") + }) + } else { + obj.size <- obj.size.bytes + } + obj.dim <- t(napply(names, function(x) as.numeric(dim(x))[1:2])) + vec <- is.na(obj.dim)[, 1] & (obj.type != "function") + obj.dim[vec, 1] <- napply(names, length)[vec] + out <- data.frame(obj.type, obj.size, obj.dim) + names(out) <- c("Type", "Size", "Rows", "Columns") + if (! missing(order.by)) + if (order.by == "Size") { + out <- out[order(obj.size.bytes, decreasing=decreasing), ] + } else { + out <- out[order(out[[order.by]], decreasing=decreasing), ] + } + if (head) + out <- head(out, n) + out +} +# shorthand +lsobjs <- function(..., n=10) { + .ls.objects(..., order.by="Size", decreasing=TRUE, + pretty.size=TRUE, head=TRUE, n=n) +} + diff --git a/r/scaleSaliency.R b/r/scaleSaliency.R new file mode 100644 index 0000000..80172bc --- /dev/null +++ b/r/scaleSaliency.R @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +# +# Scale Saliency algorithm +# +# Reference: +# [1] T. Kadir & M. Brady, Saliency, Scale and Image Description. +# 2001, International Journal of Computer Vision, 45(2), 83-105 +# +# Aaron LI +# 2015/07/29 +# + + +# Calculate Shannon entropy from histogram of probability densities. +# Arguments: +# phist - histogram of probability density +# Return: +# entropy value +calcShannonEntropy <- function(phist) { + # Helper function to calculate the entropy + # Arguments: + # p - probability density of each bin + # Return: + # p * log(p) if p > 0, otherwise 0 + plogp <- function(p) { + if (p < 1e-10) { + return(0.0) + } else { + return(p * log(p)) + } + } + # calculate the entropy + entropy <- -sum(sapply(phist, plogp)) + return(entropy) +} + + +# Generate the template circular regions for each scale. +# Arguments: +# scale_min - minimum scale (radius of circle) +# scale_max - maximum scale +# Return: +# list of matrix which represent the regions of interest (with value of TRUE) +circleROI <- function(scale_min, scale_max) { + rows <- 2 * scale_max + 1 + cols <- rows + rc <- (rows + 1) / 2 # central row + cc <- (cols + 1) / 2 # central col + roi <- list() + for (s in scale_min:scale_max) { + radius2 <- s^2 + m <- matrix(0, nrow=rows, ncol=cols) + roi[[paste("scale", s, sep="")]] <- + ifelse(((row(m)-rc)^2 + (col(m)-cc)^2) <= radius2, + TRUE, FALSE) + } + return(roi) +} + + +# Calculate the scale saliencies for the 1D case: scalar image +# Arguments: +# img - input *scalar* image +# scale_min - minimum scale (pixels of radius of circle) +# scale_max - maximum scale (NOTE: scale_max >= scale_min+2) +# nbins - how many bins used for histogram +# progressbar - whether to display the progress bar +# Return: +# 6-column data.frame contains the scale saliencies results +calcScaleSaliency1D <- function(img, scale_min, scale_max, nbins, + progressbar=TRUE) { + # check scale range first: must have at least 3 scales + stopifnot(scale_max >= scale_min+2) + # get number of rows and columns + rows <- nrow(img) + cols <- ncol(img) + # determine the saliency calculation region of the image + # FIXME: how to deal with the boundaries??? + row_begin <- scale_max + 1 + col_begin <- scale_max + 1 + row_end <- rows - scale_max + col_end <- cols - scale_max + # templates of regions for each scale + roi <- circleROI(scale_min, scale_max) + # R data frame to store the saliency results + scaleSaliency <- data.frame(row=numeric(0), col=numeric(0), + scale=numeric(0), entropy=numeric(0), + disimilarity=numeric(0), saliency=numeric(0)) + # determine the breakpoints for histogram + hist_breaks <- (0:nbins) * (max(img) - min(img))/nbins + min(img) + if (progressbar) { + # progress bar + pb <- txtProgressBar(min=row_begin, max=row_end, style=3) + } + for (ri in row_begin:row_end) { + if (progressbar) { + # update progress bar + setTxtProgressBar(pb, ri) + } + for (ci in col_begin:col_end) { + # filter out the required size of image block, which is + # used to calculate its histogram, entropy, etc. + imgROI <- img[(ri-scale_max):(ri+scale_max), + (ci-scale_max):(ci+scale_max)] + # vectors to store entropies and distances + entropy <- numeric(scale_max-scale_min+1) + distance <- numeric(scale_max-scale_min+1) + # initial probability density for scale of 's-1' + scaleHistPr0 <- rep(0, nbins) + for (s in scale_min:scale_max) { + scaleROI <- roi[[paste("scale", s, sep="")]] + # NOTE: do not use 'breaks=nbins', since the number is a + # suggestion only and breakpoints will be set to 'prtty' + # values in this case. + scaleHist <- hist(imgROI[scaleROI], + breaks=hist_breaks, plot=FALSE) + scaleHistPr <- scaleHist$counts / sum(scaleHist$counts) + # calculate Shannon entropy + entropy[s-scale_min+1] <- calcShannonEntropy(scaleHistPr) + # FIXME: calculate distance of scales??? + distance[s-scale_min+1] <- sum(abs(scaleHistPr-scaleHistPr0)) + # save the probability density of current scale 's' + scaleHistPr0 <- scaleHistPr + } + # smooth the 'distance' vector to reduce the impacts of noise + distance1 <- c(distance[1], distance[1:(length(distance)-1)]) + distance2 <- c(distance[2:length(distance)], + distance[length(distance)]) + distance <- (distance1 + distance + distance2) / 3 + # find the peaks of entropy, and the corresponding scales + peakScale <- c(FALSE, + ((entropy[2:(length(entropy)-1)] > + entropy[1:(length(entropy)-2)]) & + (entropy[2:(length(entropy)-1)] > + entropy[3:length(entropy)])), + FALSE) + #cat("peakScale:", peakScale, "\n") + # calculate the inter-scale saliencies for each entropy peaks + for (s in (scale_min:scale_max)[peakScale]) { + scaleNorm <- s*s / (2*s - 1) + scaleEntropy <- entropy[s-scale_min+1] + disimilarity <- scaleNorm * distance[s-scale_min+1] + saliency <- scaleEntropy * disimilarity + scaleSaliency[nrow(scaleSaliency)+1, ] <- list(ri, ci, s, + scaleEntropy, + disimilarity, + saliency) + } + } + } + if (progressbar) { + # close progress bar + close(pb) + } + return(scaleSaliency) +} + + +# Simple greedy clustering algorithm to filter out salient regions. +# Arguments: +# ssaliency - saliency results from 'calcScaleSaliency*' +# ssaliency_th - inter-scale saliency threshold +# disimilarity_th - disimilarity threshold +# Return: +# clustered & filtered saliency regions +greedyCluster <- function(ssaliency, ssaliency_th, disimilarity_th) { + # filter by global saliency & inter-scale saliency threshold + ssaliency <- ssaliency[((ssaliency$saliency > ssaliency_th) & + (ssaliency$disimilarity > disimilarity_th)), ] + # sort in descending inter-scale saliency + ssaliency <- ssaliency[order(-ssaliency$saliency), ] + # cluster salienct points + clusteredSaliency <- ssaliency[NULL, ] + while (nrow(ssaliency) > 0) { + ss <- ssaliency[1, ] + clusteredSaliency[nrow(clusteredSaliency)+1, ] <- ss + distance2 <- (ssaliency$row - ss$row)^2 + (ssaliency$col - ss$col)^2 + # filter out the points inside the current salient circle + ssaliency <- ssaliency[(distance2 > ss$scale^2), ] + } + return(clusteredSaliency) +} + + +# Plot the image and salient regions with ggplot2 +# Arguments: +# img - input image +# saliency - saliency restults by clusteredSaliency() +plotSalientReg <- function(img, saliency) { + require(reshape2) + require(ggplot2) + plotCircle <- function(xc, yc, radius) { + theta <- seq(0, 2*pi, length.out=100) + gcircle <- annotate("path", + x=xc+radius*cos(theta), + y=yc+radius*sin(theta), + colour="green") + return(gcircle) + } + # plot the image + gp <- ggplot(melt(img), aes(Var2, -Var1, fill=value)) + geom_raster() + # add circles + for (i in 1:nrow(saliency)) { + ss <- saliency[i, ] + gcircle <- plotCircle(ss$col, -ss$row, ss$scale) + gp <- gp + gcircle + } + return(gp) +} + + +# Convert the scale saliency information to DS9 regions. +# +# NOTE: +# However, the rows and columns of the FITS matrix in R correspond +# to the X and Y axes in DS9, which is *swapped*. +# Thus the region width and height correspond to the row range and +# column range, respectively. +# +# Arguments: +# saliency - saliency restults by clusteredSaliency() +# Return: +# vector of DS9 region strings +saliency2region <- function(saliency) { + regions <- with(saliency, + paste("circle(", row, ",", col, ",", scale, ")", + sep="")) + return(regions) +} + + +# Write DS9 region to file with appropriate header information. +# +# Arguments: +# filename - output region file +# region - vector/list of region strings +save.region <- function(filename, region) { + rf <- file(filename, "w") + region.hdr <- c("# Region file format: DS9 version 4.1", "image") + writeLines(region.hdr, rf) + writeLines(region, rf) + close(rf) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # |