StackOverflow2013

Note that there are some explanatory texts on larger screens.

plurals

PO
text
Body
copied!<p>Thomas's answer is much better than any of the three approaches I tried. Here I compare the four approaches with <code>microbenchmark</code>. I have not yet tried Thomas's answer with the actual data. My original nested for-loops approach is still running after 22 hours.</p> <pre><code>Unit: milliseconds expr min lq median uq max neval fn.1(x, weights) 98.69133 99.47574 100.5313 101.7315 108.8757 20 fn.2(x, weights) 755.51583 758.12175 762.3775 776.0558 801.9615 20 fn.3(x, weights) 564.21423 567.98822 568.5322 571.0975 575.1809 20 fn.4(x, weights) 367.05862 370.52657 371.7439 373.7367 395.0423 20 ######################################################################################### # create data set.seed(1234) n.rows <- 40 n.cols <- 40 n.sample <- n.rows * n.cols x <- sample(20, n.sample, replace=TRUE) x.NA <- sample(n.rows*n.cols, 10*(n.sample / n.rows), replace=FALSE) x[x.NA] <- NA x <- as.data.frame(matrix(x, nrow = n.rows)) weights <- sample(4, n.sample, replace=TRUE) weights <- as.data.frame(matrix(weights, nrow = n.rows)) weights ######################################################################################### # Thomas's function fn.1 <- function(x, weights){ newx <- reshape(x, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names=c("v1", "v2")) newwt <- reshape(weights, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names=c("w1", "w2")) condwtmean <- function(x,y,wtx,wty){ if(xor(is.na(x),is.na(y))){ if(is.na(x)) x <- (y / wty) * wtx # replacement function if(is.na(y)) y <- (x / wtx) * wty # replacement function return(weighted.mean(c(x,y),c(wtx,wty))) } else if(!is.na(x) & !is.na(y)) return(weighted.mean(c(x,y),c(wtx,wty))) else return(NA) } newx$wtmean <- mapply(condwtmean, newx$v1, newx$v2, newwt$w1, newwt$w2) newx2 <- reshape(newx[,c(1,4:5)], v.names = "wtmean", timevar = "time", direction = "wide") newx2 <- newx2[,2:(n.cols/2+1)] names(newx2) <- paste('X', 1:(n.cols/2), sep = "") return(newx2) } fn.1.output <- fn.1(x, weights) ######################################################################################### # nested for-loops with 4 if statements fn.2 <- function(x, weights){ for(i in 1: (ncol(x)/2)) { for(j in 1: nrow(x)) { if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = (x[j,(1 + ((i-1)*2 + 1))] / weights[j,(1 + ((i-1)*2 + 1))]) * weights[j,(1 + (i-1)*2 + 0)] if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = (x[j,(1 + ((i-1)*2 + 0))] / weights[j,(1 + ((i-1)*2 + 0))]) * weights[j,(1 + (i-1)*2 + 1)] if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = NA if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = NA } } x.weights = x * weights numerator <- sapply(seq(1,ncol(x.weights),2), function(i) { apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T) }) denominator <- sapply(seq(1,ncol(weights),2), function(i) { apply(weights[,c(i, i+1)], 1, sum, na.rm=T) }) weighted.x <- numerator/denominator for(i in 1: (ncol(x)/2)) { for(j in 1: nrow(x) ) { if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA } } return(weighted.x) } fn.2.output <- fn.2(x, weights) fn.2.output <- as.data.frame(fn.2.output) names(fn.2.output) <- paste('X', 1:(n.cols/2), sep = "") ######################################################################################### # nested for-loops with 2 if statements fn.3 <- function(x, weights){ for(i in 1: (ncol(x)/2)) { for(j in 1: nrow(x)) { if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 0)] = (x[j,(1 + ((i-1)*2 + 1))] / weights[j,(1 + ((i-1)*2 + 1))]) * weights[j,(1 + (i-1)*2 + 0)] if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) x[j,(1 + (i-1)*2 + 1)] = (x[j,(1 + ((i-1)*2 + 0))] / weights[j,(1 + ((i-1)*2 + 0))]) * weights[j,(1 + (i-1)*2 + 1)] } } x.weights = x * weights numerator <- sapply(seq(1,ncol(x.weights),2), function(i) { apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T) }) denominator <- sapply(seq(1,ncol(weights),2), function(i) { apply(weights[,c(i, i+1)], 1, sum, na.rm=T) }) weighted.x <- numerator/denominator for(i in 1: (ncol(x)/2)) { for(j in 1: nrow(x) ) { if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA } } return(weighted.x) } fn.3.output <- fn.3(x, weights) fn.3.output <- as.data.frame(fn.3.output) names(fn.3.output) <- paste('X', 1:(n.cols/2), sep = "") ######################################################################################### # my reshape solution fn.4 <- function(x, weights){ new.x <- reshape(x , direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names = c("v1", "v2")) wt <- reshape(weights, direction="long", varying = list(seq(1,(n.cols-1),2), seq(2,n.cols,2)), v.names = c("w1", "w2")) new.x$v1 <- ifelse(is.na(new.x$v1), (new.x$v2 / wt$w2) * wt$w1, new.x$v1) new.x$v2 <- ifelse(is.na(new.x$v2), (new.x$v1 / wt$w1) * wt$w2, new.x$v2) x2 <- reshape(new.x, direction="wide", varying = list(seq(1,3,2), seq(2,4,2)), v.names = c("v1", "v2")) x <- x2[,2:(n.cols+1)] x.weights = x * weights numerator <- sapply(seq(1,ncol(x.weights),2), function(i) { apply(x.weights[,c(i, i+1)], 1, sum, na.rm=T) }) denominator <- sapply(seq(1,ncol(weights),2), function(i) { apply(weights[,c(i, i+1)], 1, sum, na.rm=T) }) weighted.x <- numerator/denominator for(i in 1: (ncol(x)/2)) { for(j in 1: nrow(x) ) { if( is.na(x[j,(1 + (i-1)*2)]) & !is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if(!is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = sum(c(x[j,(1 + ((i-1)*2))], x[j,(1 + ((i-1)*2 + 1))]), na.rm = TRUE) if( is.na(x[j,(1 + (i-1)*2)]) & is.na(x[j,(1 + (i-1)*2 + 1)])) weighted.x[j,i] = NA } } return(weighted.x) } fn.4.output <- fn.4(x, weights) fn.4.output <- as.data.frame(fn.4.output) names(fn.4.output) <- paste('X', 1:(n.cols/2), sep = "") ######################################################################################### rownames(fn.1.output) <- NULL rownames(fn.2.output) <- NULL rownames(fn.3.output) <- NULL rownames(fn.4.output) <- NULL all.equal(fn.1.output, fn.2.output) all.equal(fn.1.output, fn.3.output) all.equal(fn.1.output, fn.4.output) all.equal(fn.2.output, fn.3.output) all.equal(fn.2.output, fn.4.output) all.equal(fn.3.output, fn.4.output) library(microbenchmark) microbenchmark(fn.1(x, weights), fn.2(x, weights), fn.3(x, weights), fn.4(x, weights), times=20) ######################################################################################### </code></pre>

Querying!

Guidance

An individual column

Larger individual text columns get their own page to allow for proper reading.

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload