My goal is to apply a function func1
to each row of the matrix input
and then return a new one resulting from the transformation.
The code works but when the data frame contains more than 1 million rows, it become extremely slow. How can I optimize my code? I start learning programming and I am not familiar with strategies to speed up R code.
The functions performs 2 main steps:
- Find the locations of all neighboring cells that are located in the extent
PR
from a focal cell, extractraster
's values at these locations and calculate probability matrix - Find the maximum value in the matrix and the new cell corresponding with the maximum value.
Here's the data frame and raster:
library(dplyr)
library(raster)
library(psych)
set.seed(1234)
n = 10000
input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
r <- raster(extent(0, 10, 0, 10), res = 1)
values(r) <- sample(1:1000, size = 10*10, replace = T)
## plot(r)
Here's my code to apply the function to each row in the matrix:
system.time(
test <- input %>%
split(1:nrow(input)) %>%
map(~ func1(.x, 2, 2, "test_1")) %>%
do.call("rbind", .))
Here's the function:
func1 <- function(dataC, PR, DB, MT){
## Retrieve the coordinates x and y of the current cell
c1 <- dataC[[1]]
c2 <- dataC[[2]]
## Retrieve the coordinates x and y of the previous cell
c3 <- dataC[[3]]
c4 <- dataC[[4]]
## Initialize the coordinates x and y of the new cell
newc1 <- -999
newc2 <- -999
if(MT=="test_1"){
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
V1 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
V2 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
V3 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
V4 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
V5 <- 0 ## cell at middle corner
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
V6 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
V7 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
V8 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
V9 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
} else if(MT=="test_2"){
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
V1 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
V2 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
V3 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
V4 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
V5 <- 0 ## cells at middle corner
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
V6 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
V7 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
V8 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
## Extract the raster values with coordinates in matC
matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
V9 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
}
## Build the matrix of cell selection
tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5,
(1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
## Test if the new cell is the previous cell
if(c3 == newc1 && c4 == newc2){
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
## Find the maximum value in the matrix
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## print(choiceC)
## If there are several maximum values
if(nrow(choiceC) > 1){
choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
}
## Find the new cell relative to the current cell
if(choiceC[1]==1 & choiceC[2]==1){ ## cell at the upper-left corner
newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==2){ ## cell at the upper-middle corner
newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
} else if(choiceC[1]==1 & choiceC[2]==3){ ## cell at the upper-right corner
newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==1){ ## cell at the left corner
newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==2 & choiceC[2]==3){ ## cell at the right corner
newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==1){ ## cell at the bottom-left corner
newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==2){ ## cell at the bottom-middle corner
newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
} else if(choiceC[1]==3 & choiceC[2]==3){ ## cell at the bottom-right corner
newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
}
newc1 <- newC[[1]]
newc2 <- newC[[2]]
}
return(newC)
}
Here's the elapsed time when n = 10000. Ideally, I would like to reduce the time required at < 1 min.
user system elapsed
108.96 0.01 109.81
1 Answer 1
Did dome upgrades, but only for 'test_1'
case, you can update 'test2'
case similarly.
For me this function run in 13.54 sek vs 26.16 sek for your original code.
func1 <- function(dataC, PR, DB, MT){
## Retrieve the coordinates x and y of the current cell
c1 <- dataC[[1]]
c2 <- dataC[[2]]
## Retrieve the coordinates x and y of the previous cell
c3 <- dataC[[3]]
c4 <- dataC[[4]]
## Initialize the coordinates x and y of the new cell
newc1 <- -999
newc2 <- -999
a1 <- c((c1 - PR), (c1 - 1))
a2 <- c((c2 - PR), (c2 - 1))
a3 <- c((c2 - 1), (c2 + 1))
a4 <- c((c2 + 1), (c2 + PR))
a5 <- c((c1 - 1), (c1 + 1))
a6 <- c((c1 + 1), (c1 + PR))
xx <- c(a1, a2, a3, a4, a5, a6)
xx <- seq(min(xx), max(xx))
gg <- expand.grid(xx, xx, KEEP.OUT.ATTRS = F)
gg <- as.matrix(gg)
gg1 <- gg[, 1]
gg2 <- gg[, 2]
ff2 <- function(matC) {
y1 <- raster::extract(r, matC)
mean(y1, na.rm = T)
}
cgrid <- function(x, y) {
gg[gg1 >= x[1] & gg1 <= x[2] & gg2 >= y[1] & gg2 <= y[2], ]
}
if (MT == "test_1") {
## cells at upper-left corner
V1 <- ff2(cgrid(x = a1, y = a2)) * sqrt(2) * DB
## cells at upper-middle corner
V2 <- ff2(cgrid(x = a1, y = a3)) * DB
## cells at upper-right corner
V3 <- ff2(cgrid(x = a1, y = a4)) * sqrt(2) * DB
## cells at left corner
V4 <- ff2(cgrid(x = a5, y = a2)) * DB
V5 <- 0 ## cell at middle corner
## cells at right corner
V6 <- ff2(cgrid(x = a5, y = a4)) * DB
## cells at bottom-left corner
V7 <- ff2(cgrid(x = a6, y = a2)) * sqrt(2) * DB
## cells at bottom-middle corner
V8 <- ff2(cgrid(x = a6, y = a3)) * DB
## cells at bottom-right corner
V9 <- ff2(cgrid(x = a6, y = a4) ) * sqrt(2) * DB
}
## Build the matrix of cell selection
V <- c(V1, V2, V3, V4, V5, V6, V7, V8, V9)
tot <- sum(1/V[-5], na.rm = TRUE)
mat_V <- matrix((1/V)/tot, nrow = 3, ncol = 3, byrow = TRUE)
mat_V[5] <- V5
while ((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)) {
## Test if the new cell is the previous cell
if (c3 == newc1 && c4 == newc2) {
mat_V[choiceC[1], choiceC[2]] <- NaN
## print(mat_V)
}
## Find the maximum value in the matrix
choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
## If there are several maximum values
if (nrow(choiceC) > 1) choiceC <- choiceC[sample.int(nrow(choiceC), 1L), ]
## Find the new cell relative to the current cell
newC <- c(x = c1 + (choiceC[1] - 2), y = c2 + (choiceC[2] - 2))
newC <- matrix(newC, ncol = 2)
newc1 <- newC[[1]]
newc2 <- newC[[2]]
}
return(newC)
}
-
\$\begingroup\$ Thank you very much ! The function runs for 99.16 s vs 110 s from my code. \$\endgroup\$Pierre– Pierre2018年11月26日 15:22:32 +00:00Commented Nov 26, 2018 at 15:22
r
need to beraster
? can no t we use simple matrix? \$\endgroup\$r
can be a matrix. \$\endgroup\$