3
\$\begingroup\$

My goal is to apply a function func1 to each row of the matrix input and then return a new one resulting from the transformation.

The code works but when the data frame contains more than 1 million rows, it become extremely slow. How can I optimize my code? I start learning programming and I am not familiar with strategies to speed up R code.

The functions performs 2 main steps:

  1. Find the locations of all neighboring cells that are located in the extent PR from a focal cell, extract raster's values at these locations and calculate probability matrix
  2. Find the maximum value in the matrix and the new cell corresponding with the maximum value.

Here's the data frame and raster:

library(dplyr)
library(raster)
library(psych)
 set.seed(1234) 
 n = 10000
 input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
 r <- raster(extent(0, 10, 0, 10), res = 1)
 values(r) <- sample(1:1000, size = 10*10, replace = T)
 ## plot(r)

Here's my code to apply the function to each row in the matrix:

system.time(
 test <- input %>% 
 split(1:nrow(input)) %>% 
 map(~ func1(.x, 2, 2, "test_1")) %>% 
 do.call("rbind", .))

Here's the function:

func1 <- function(dataC, PR, DB, MT){
 ## Retrieve the coordinates x and y of the current cell
 c1 <- dataC[[1]]
 c2 <- dataC[[2]]
 ## Retrieve the coordinates x and y of the previous cell
 c3 <- dataC[[3]]
 c4 <- dataC[[4]]
 ## Initialize the coordinates x and y of the new cell
 newc1 <- -999
 newc2 <- -999
 if(MT=="test_1"){
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
 V1 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
 V2 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
 V3 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
 V4 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 V5 <- 0 ## cell at middle corner
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
 V6 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
 V7 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
 V8 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
 V9 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 } else if(MT=="test_2"){
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
 V1 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
 V2 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
 V3 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
 V4 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 V5 <- 0 ## cells at middle corner
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
 V6 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
 V7 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
 V8 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
 V9 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 }
 ## Build the matrix of cell selection
 tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
 mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5, 
 (1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
 while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
 ## Test if the new cell is the previous cell
 if(c3 == newc1 && c4 == newc2){
 mat_V[choiceC[1], choiceC[2]] <- NaN
 ## print(mat_V)
 }
 ## Find the maximum value in the matrix
 choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
 ## print(choiceC)
 ## If there are several maximum values
 if(nrow(choiceC) > 1){
 choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
 }
 ## Find the new cell relative to the current cell 
 if(choiceC[1]==1 & choiceC[2]==1){ ## cell at the upper-left corner
 newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==1 & choiceC[2]==2){ ## cell at the upper-middle corner
 newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
 } else if(choiceC[1]==1 & choiceC[2]==3){ ## cell at the upper-right corner
 newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
 } else if(choiceC[1]==2 & choiceC[2]==1){ ## cell at the left corner
 newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==2 & choiceC[2]==3){ ## cell at the right corner
 newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==1){ ## cell at the bottom-left corner
 newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==2){ ## cell at the bottom-middle corner
 newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==3){ ## cell at the bottom-right corner
 newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
 }
 newc1 <- newC[[1]]
 newc2 <- newC[[2]]
 }
 return(newC)
 } 

Here's the elapsed time when n = 10000. Ideally, I would like to reduce the time required at < 1 min.

user system elapsed 
108.96 0.01 109.81 
asked Nov 23, 2018 at 20:25
\$\endgroup\$
2
  • \$\begingroup\$ do the r need to be raster? can no t we use simple matrix? \$\endgroup\$ Commented Nov 26, 2018 at 12:28
  • \$\begingroup\$ Yes, r can be a matrix. \$\endgroup\$ Commented Nov 26, 2018 at 15:18

1 Answer 1

2
\$\begingroup\$

Did dome upgrades, but only for 'test_1' case, you can update 'test2' case similarly. For me this function run in 13.54 sek vs 26.16 sek for your original code.

func1 <- function(dataC, PR, DB, MT){
 ## Retrieve the coordinates x and y of the current cell
 c1 <- dataC[[1]]
 c2 <- dataC[[2]]
 ## Retrieve the coordinates x and y of the previous cell
 c3 <- dataC[[3]]
 c4 <- dataC[[4]]
 ## Initialize the coordinates x and y of the new cell
 newc1 <- -999
 newc2 <- -999
 a1 <- c((c1 - PR), (c1 - 1))
 a2 <- c((c2 - PR), (c2 - 1))
 a3 <- c((c2 - 1), (c2 + 1))
 a4 <- c((c2 + 1), (c2 + PR))
 a5 <- c((c1 - 1), (c1 + 1))
 a6 <- c((c1 + 1), (c1 + PR))
 xx <- c(a1, a2, a3, a4, a5, a6)
 xx <- seq(min(xx), max(xx))
 gg <- expand.grid(xx, xx, KEEP.OUT.ATTRS = F)
 gg <- as.matrix(gg)
 gg1 <- gg[, 1]
 gg2 <- gg[, 2]
 ff2 <- function(matC) {
 y1 <- raster::extract(r, matC)
 mean(y1, na.rm = T)
 }
 cgrid <- function(x, y) {
 gg[gg1 >= x[1] & gg1 <= x[2] & gg2 >= y[1] & gg2 <= y[2], ]
 }
 if (MT == "test_1") {
 ## cells at upper-left corner
 V1 <- ff2(cgrid(x = a1, y = a2)) * sqrt(2) * DB
 ## cells at upper-middle corner
 V2 <- ff2(cgrid(x = a1, y = a3)) * DB
 ## cells at upper-right corner
 V3 <- ff2(cgrid(x = a1, y = a4)) * sqrt(2) * DB
 ## cells at left corner
 V4 <- ff2(cgrid(x = a5, y = a2)) * DB
 V5 <- 0 ## cell at middle corner
 ## cells at right corner
 V6 <- ff2(cgrid(x = a5, y = a4)) * DB
 ## cells at bottom-left corner
 V7 <- ff2(cgrid(x = a6, y = a2)) * sqrt(2) * DB 
 ## cells at bottom-middle corner
 V8 <- ff2(cgrid(x = a6, y = a3)) * DB
 ## cells at bottom-right corner
 V9 <- ff2(cgrid(x = a6, y = a4) ) * sqrt(2) * DB
 }
 ## Build the matrix of cell selection
 V <- c(V1, V2, V3, V4, V5, V6, V7, V8, V9)
 tot <- sum(1/V[-5], na.rm = TRUE)
 mat_V <- matrix((1/V)/tot, nrow = 3, ncol = 3, byrow = TRUE)
 mat_V[5] <- V5
 while ((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)) {
 ## Test if the new cell is the previous cell
 if (c3 == newc1 && c4 == newc2) {
 mat_V[choiceC[1], choiceC[2]] <- NaN
 ## print(mat_V)
 }
 ## Find the maximum value in the matrix
 choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
 ## If there are several maximum values
 if (nrow(choiceC) > 1) choiceC <- choiceC[sample.int(nrow(choiceC), 1L), ]
 ## Find the new cell relative to the current cell 
 newC <- c(x = c1 + (choiceC[1] - 2), y = c2 + (choiceC[2] - 2))
 newC <- matrix(newC, ncol = 2)
 newc1 <- newC[[1]]
 newc2 <- newC[[2]]
 }
 return(newC)
} 
answered Nov 26, 2018 at 10:34
\$\endgroup\$
1
  • \$\begingroup\$ Thank you very much ! The function runs for 99.16 s vs 110 s from my code. \$\endgroup\$ Commented Nov 26, 2018 at 15:22

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.