Applying a function to each row of a matrix

Question 1

My goal is to apply a function func1 to each row of the matrix input and then return a new one resulting from the transformation.

The code works but when the data frame contains more than 1 million rows, it become extremely slow. How can I optimize my code? I start learning programming and I am not familiar with strategies to speed up R code.

The functions performs 2 main steps:

Find the locations of all neighboring cells that are located in the extent PR from a focal cell, extract raster's values at these locations and calculate probability matrix
Find the maximum value in the matrix and the new cell corresponding with the maximum value.

Here's the data frame and raster:

library(dplyr)
library(raster)
library(psych)
 set.seed(1234) 
 n = 10000
 input <- as.matrix(data.frame(c1 = sample(1:10, n, replace = T), c2 = sample(1:10, n, replace = T), c3 = sample(1:10, n, replace = T), c4 = sample(1:10, n, replace = T)))
 r <- raster(extent(0, 10, 0, 10), res = 1)
 values(r) <- sample(1:1000, size = 10*10, replace = T)
 ## plot(r)

Here's my code to apply the function to each row in the matrix:

system.time(
 test <- input %>% 
 split(1:nrow(input)) %>% 
 map(~ func1(.x, 2, 2, "test_1")) %>% 
 do.call("rbind", .))

Here's the function:

func1 <- function(dataC, PR, DB, MT){
 ## Retrieve the coordinates x and y of the current cell
 c1 <- dataC[[1]]
 c2 <- dataC[[2]]
 ## Retrieve the coordinates x and y of the previous cell
 c3 <- dataC[[3]]
 c4 <- dataC[[4]]
 ## Initialize the coordinates x and y of the new cell
 newc1 <- -999
 newc2 <- -999
 if(MT=="test_1"){
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
 V1 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
 V2 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
 V3 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
 V4 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 V5 <- 0 ## cell at middle corner
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
 V6 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
 V7 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
 V8 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
 V9 <- mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 } else if(MT=="test_2"){
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at upper-left corner
 V1 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 - 1) : (c2 + 1))) ## cells at upper-middle corner
 V2 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - PR) : (c1 - 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at upper-right corner
 V3 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 - PR) : (c2 - 1))) ## cells at left corner
 V4 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 V5 <- 0 ## cells at middle corner
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 - 1) : (c1 + 1)), y = c((c2 + 1) : (c2 + PR))) ## cells at right corner
 V6 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - PR) : (c2 - 1))) ## cells at bottom-left corner
 V7 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB 
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 - 1) : (c2 + 1))) ## cells at bottom-middle corner
 V8 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * DB
 ## Extract the raster values with coordinates in matC
 matC <- expand.grid(x = c((c1 + 1) : (c1 + PR)), y = c((c2 + 1) : (c2 + PR))) ## cells at bottom-right corner
 V9 <- harmonic.mean(raster::extract(r, cbind(matC[,1], matC[,2])), na.rm = T) * sqrt(2) * DB
 }
 ## Build the matrix of cell selection
 tot <- sum(c(1/V1, 1/V2, 1/V3, 1/V4, 1/V6, 1/V7, 1/V8, 1/V9), na.rm = TRUE)
 mat_V <- matrix(data = c((1/V1)/tot, (1/V2)/tot, (1/V3)/tot, (1/V4)/tot, V5, 
 (1/V6)/tot, (1/V7)/tot, (1/V8)/tot, (1/V9)/tot), nrow = 3, ncol = 3, byrow = TRUE)
 while((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)){
 ## Test if the new cell is the previous cell
 if(c3 == newc1 && c4 == newc2){
 mat_V[choiceC[1], choiceC[2]] <- NaN
 ## print(mat_V)
 }
 ## Find the maximum value in the matrix
 choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
 ## print(choiceC)
 ## If there are several maximum values
 if(nrow(choiceC) > 1){
 choiceC <- choiceC[sample(1:nrow(choiceC), 1), ]
 }
 ## Find the new cell relative to the current cell 
 if(choiceC[1]==1 & choiceC[2]==1){ ## cell at the upper-left corner
 newC <- matrix(c(x = c1 - 1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==1 & choiceC[2]==2){ ## cell at the upper-middle corner
 newC <- matrix(c(x = c1 - 1, y = c2), ncol = 2)
 } else if(choiceC[1]==1 & choiceC[2]==3){ ## cell at the upper-right corner
 newC <- matrix(c(x = c1 - 1, y = c2 + 1), ncol = 2)
 } else if(choiceC[1]==2 & choiceC[2]==1){ ## cell at the left corner
 newC <- matrix(c(x = c1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==2 & choiceC[2]==3){ ## cell at the right corner
 newC <- matrix(c(x = c1, y = c2 + 1), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==1){ ## cell at the bottom-left corner
 newC <- matrix(c(x = c1 + 1, y = c2 - 1), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==2){ ## cell at the bottom-middle corner
 newC <- matrix(c(x = c1 + 1, y = c2), ncol = 2)
 } else if(choiceC[1]==3 & choiceC[2]==3){ ## cell at the bottom-right corner
 newC <- matrix(c(x = c1 + 1, y = c2 + 1), ncol = 2)
 }
 newc1 <- newC[[1]]
 newc2 <- newC[[2]]
 }
 return(newC)
 }

Here's the elapsed time when n = 10000. Ideally, I would like to reduce the time required at < 1 min.

user system elapsed 
108.96 0.01 109.81

Question 2

do the r need to be raster? can no t we use simple matrix?

Question 3

Yes, r can be a matrix.

Question 4

Did dome upgrades, but only for 'test_1' case, you can update 'test2' case similarly. For me this function run in 13.54 sek vs 26.16 sek for your original code.

func1 <- function(dataC, PR, DB, MT){
 ## Retrieve the coordinates x and y of the current cell
 c1 <- dataC[[1]]
 c2 <- dataC[[2]]
 ## Retrieve the coordinates x and y of the previous cell
 c3 <- dataC[[3]]
 c4 <- dataC[[4]]
 ## Initialize the coordinates x and y of the new cell
 newc1 <- -999
 newc2 <- -999
 a1 <- c((c1 - PR), (c1 - 1))
 a2 <- c((c2 - PR), (c2 - 1))
 a3 <- c((c2 - 1), (c2 + 1))
 a4 <- c((c2 + 1), (c2 + PR))
 a5 <- c((c1 - 1), (c1 + 1))
 a6 <- c((c1 + 1), (c1 + PR))
 xx <- c(a1, a2, a3, a4, a5, a6)
 xx <- seq(min(xx), max(xx))
 gg <- expand.grid(xx, xx, KEEP.OUT.ATTRS = F)
 gg <- as.matrix(gg)
 gg1 <- gg[, 1]
 gg2 <- gg[, 2]
 ff2 <- function(matC) {
 y1 <- raster::extract(r, matC)
 mean(y1, na.rm = T)
 }
 cgrid <- function(x, y) {
 gg[gg1 >= x[1] & gg1 <= x[2] & gg2 >= y[1] & gg2 <= y[2], ]
 }
 if (MT == "test_1") {
 ## cells at upper-left corner
 V1 <- ff2(cgrid(x = a1, y = a2)) * sqrt(2) * DB
 ## cells at upper-middle corner
 V2 <- ff2(cgrid(x = a1, y = a3)) * DB
 ## cells at upper-right corner
 V3 <- ff2(cgrid(x = a1, y = a4)) * sqrt(2) * DB
 ## cells at left corner
 V4 <- ff2(cgrid(x = a5, y = a2)) * DB
 V5 <- 0 ## cell at middle corner
 ## cells at right corner
 V6 <- ff2(cgrid(x = a5, y = a4)) * DB
 ## cells at bottom-left corner
 V7 <- ff2(cgrid(x = a6, y = a2)) * sqrt(2) * DB 
 ## cells at bottom-middle corner
 V8 <- ff2(cgrid(x = a6, y = a3)) * DB
 ## cells at bottom-right corner
 V9 <- ff2(cgrid(x = a6, y = a4) ) * sqrt(2) * DB
 }
 ## Build the matrix of cell selection
 V <- c(V1, V2, V3, V4, V5, V6, V7, V8, V9)
 tot <- sum(1/V[-5], na.rm = TRUE)
 mat_V <- matrix((1/V)/tot, nrow = 3, ncol = 3, byrow = TRUE)
 mat_V[5] <- V5
 while ((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)) {
 ## Test if the new cell is the previous cell
 if (c3 == newc1 && c4 == newc2) {
 mat_V[choiceC[1], choiceC[2]] <- NaN
 ## print(mat_V)
 }
 ## Find the maximum value in the matrix
 choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
 ## If there are several maximum values
 if (nrow(choiceC) > 1) choiceC <- choiceC[sample.int(nrow(choiceC), 1L), ]
 ## Find the new cell relative to the current cell 
 newC <- c(x = c1 + (choiceC[1] - 2), y = c2 + (choiceC[2] - 2))
 newC <- matrix(newC, ncol = 2)
 newc1 <- newC[[1]]
 newc2 <- newC[[2]]
 }
 return(newC)
}

Question 5

Thank you very much ! The function runs for 99.16 s vs 110 s from my code.

minem minem 1,0021 gold badge9 silver badges12 bronze badges · Accepted Answer · 2018-11-26 10:34:55Z

Did dome upgrades, but only for 'test_1' case, you can update 'test2' case similarly. For me this function run in 13.54 sek vs 26.16 sek for your original code.

func1 <- function(dataC, PR, DB, MT){
 ## Retrieve the coordinates x and y of the current cell
 c1 <- dataC[[1]]
 c2 <- dataC[[2]]
 ## Retrieve the coordinates x and y of the previous cell
 c3 <- dataC[[3]]
 c4 <- dataC[[4]]
 ## Initialize the coordinates x and y of the new cell
 newc1 <- -999
 newc2 <- -999
 a1 <- c((c1 - PR), (c1 - 1))
 a2 <- c((c2 - PR), (c2 - 1))
 a3 <- c((c2 - 1), (c2 + 1))
 a4 <- c((c2 + 1), (c2 + PR))
 a5 <- c((c1 - 1), (c1 + 1))
 a6 <- c((c1 + 1), (c1 + PR))
 xx <- c(a1, a2, a3, a4, a5, a6)
 xx <- seq(min(xx), max(xx))
 gg <- expand.grid(xx, xx, KEEP.OUT.ATTRS = F)
 gg <- as.matrix(gg)
 gg1 <- gg[, 1]
 gg2 <- gg[, 2]
 ff2 <- function(matC) {
 y1 <- raster::extract(r, matC)
 mean(y1, na.rm = T)
 }
 cgrid <- function(x, y) {
 gg[gg1 >= x[1] & gg1 <= x[2] & gg2 >= y[1] & gg2 <= y[2], ]
 }
 if (MT == "test_1") {
 ## cells at upper-left corner
 V1 <- ff2(cgrid(x = a1, y = a2)) * sqrt(2) * DB
 ## cells at upper-middle corner
 V2 <- ff2(cgrid(x = a1, y = a3)) * DB
 ## cells at upper-right corner
 V3 <- ff2(cgrid(x = a1, y = a4)) * sqrt(2) * DB
 ## cells at left corner
 V4 <- ff2(cgrid(x = a5, y = a2)) * DB
 V5 <- 0 ## cell at middle corner
 ## cells at right corner
 V6 <- ff2(cgrid(x = a5, y = a4)) * DB
 ## cells at bottom-left corner
 V7 <- ff2(cgrid(x = a6, y = a2)) * sqrt(2) * DB 
 ## cells at bottom-middle corner
 V8 <- ff2(cgrid(x = a6, y = a3)) * DB
 ## cells at bottom-right corner
 V9 <- ff2(cgrid(x = a6, y = a4) ) * sqrt(2) * DB
 }
 ## Build the matrix of cell selection
 V <- c(V1, V2, V3, V4, V5, V6, V7, V8, V9)
 tot <- sum(1/V[-5], na.rm = TRUE)
 mat_V <- matrix((1/V)/tot, nrow = 3, ncol = 3, byrow = TRUE)
 mat_V[5] <- V5
 while ((newc1 == -999 && newc2 == -999) || (c3 == newc1 && c4 == newc2)) {
 ## Test if the new cell is the previous cell
 if (c3 == newc1 && c4 == newc2) {
 mat_V[choiceC[1], choiceC[2]] <- NaN
 ## print(mat_V)
 }
 ## Find the maximum value in the matrix
 choiceC <- which(mat_V == max(mat_V, na.rm = TRUE), arr.ind = TRUE)
 ## If there are several maximum values
 if (nrow(choiceC) > 1) choiceC <- choiceC[sample.int(nrow(choiceC), 1L), ]
 ## Find the new cell relative to the current cell 
 newC <- c(x = c1 + (choiceC[1] - 2), y = c2 + (choiceC[2] - 2))
 newC <- matrix(newC, ncol = 2)
 newc1 <- newC[[1]]
 newc2 <- newC[[2]]
 }
 return(newC)
}

Thank you very much ! The function runs for 99.16 s vs 110 s from my code.

Stack Exchange Network

Applying a function to each row of a matrix

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Applying a function to each row of a matrix

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions