Commit 3192a16

committed

Removed one block and one temporary variable allocation.

1 parent 5eb5273 commit 3192a16Copy full SHA for 3192a16

File tree

2 files changed

+36

-42

lines changed

src
- stdlib_linalg.fypp
- stdlib_linalg_matrix_functions.fypp

2 files changed

+36

-42

lines changed

`‎src/stdlib_linalg.fypp‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1693,7 +1693,7 @@ module stdlib_linalg`
`1693`	`1693`	!! `real` and `complex`.
`1694`	`1694`	`!!`
`1695`	`1695`	`!! By default, the order of the Pade approximation is set to 10. It can be changed`
`1696`		- !! via the `order` argument which must be non-negative.
	`1696`	+ !! via the `order` argument that must be non-negative.
`1697`	`1697`	`!!`
`1698`	`1698`	`!! If the input matrix is non-square or the order of the Pade approximation is`
`1699`	`1699`	`!! negative, the function returns an error state.`
`@@ -1738,7 +1738,7 @@ module stdlib_linalg`
`1738`	`1738`	!! `real` and `complex`.
`1739`	`1739`	`!!`
`1740`	`1740`	`!! By default, the order of the Pade approximation is set to 10. It can be changed`
`1741`		- !! via the `order` argument which must be non-negative.
	`1741`	+ !! via the `order` argument that must be non-negative.
`1742`	`1742`	`!!`
`1743`	`1743`	`!! If the input matrix is non-square or the order of the Pade approximation is`
`1744`	`1744`	`!! negative, the function returns an error state.`

`‎src/stdlib_linalg_matrix_functions.fypp‎`

Lines changed: 34 additions & 40 deletions

Original file line number	Diff line number	Diff line change
`@@ -30,13 +30,13 @@ contains`
`30`	`30`	`module subroutine stdlib_linalg_${ri}$_expm(A, E, order, err)`
`31`	`31`	`!> Input matrix A(n, n).`
`32`	`32`	`${rt},ドル intent(in) :: A(:, :)`
`33`		`- !> [optional] Order of the Pade approximation.`
	`33`	`+ !> Exponential of the input matrix E = exp(A).`
	`34`	`+ ${rt},ドル intent(out) :: E(:, :)`
	`35`	`+ !> [optional] Order of the Pade approximation.`
`34`	`36`	`integer(ilp), optional, intent(in) :: order`
`35`	`37`	`!> [optional] State return flag.`
`36`	`38`	`type(linalg_state_type), optional, intent(out) :: err`
`37`		`- !> Exponential of the input matrix E = exp(A).`
`38`		`- ${rt},ドル intent(out) :: E(:, :)`
`39`		`-`
	`39`	`+`
`40`	`40`	`type(linalg_state_type) :: err0`
`41`	`41`	`integer(ilp) :: lda, n, lde, ne`
`42`	`42`
`@@ -68,7 +68,7 @@ contains`
`68`	`68`	`type(linalg_state_type), optional, intent(out) :: err`
`69`	`69`
`70`	`70`	`! Internal variables.`
`71`		`- ${rt},ドル allocatable :: A2(:, :), Q(:, :), X(:, :)`
	`71`	`+ ${rt},ドル allocatable :: A2(:, :), Q(:, :), X(:, :), X_tmp(:, :)`
`72`	`72`	`real(${rk}$) :: a_norm, c`
`73`	`73`	`integer(ilp) :: m, n, ee, k, s, order_, i, j`
`74`	`74`	`logical(lk) :: p`
`@@ -105,32 +105,29 @@ contains`
`105`	`105`	`enddo`
`106`	`106`
`107`	`107`	`! Iteratively compute the Pade approximation.`
`108`		`- block`
`109`		`- ${rt},ドル allocatable :: X_tmp(:, :)`
`110`		`- p = .true.`
`111`		`- do k = 2, order_`
`112`		`- c = c * (order_ - k + 1) / (k * (2*order_ - k + 1))`
`113`		`- X_tmp = X`
`114`		`- #:if rt.startswith('complex')`
`115`		`- call gemm("N", "N", n, n, n, one_c${rk},ドル A2, n, X_tmp, n, zero_c${rk},ドル X, n)`
`116`		`- #:else`
`117`		`- call gemm("N", "N", n, n, n, one_${rk},ドル A2, n, X_tmp, n, zero_${rk},ドル X, n)`
`118`		`- #:endif`
	`108`	`+ p = .true.`
	`109`	`+ do k = 2, order_`
	`110`	`+ c = c * (order_ - k + 1) / (k * (2*order_ - k + 1))`
	`111`	`+ X_tmp = X`
	`112`	`+ #:if rt.startswith('complex')`
	`113`	`+ call gemm("N", "N", n, n, n, one_c${rk},ドル A2, n, X_tmp, n, zero_c${rk},ドル X, n)`
	`114`	`+ #:else`
	`115`	`+ call gemm("N", "N", n, n, n, one_${rk},ドル A2, n, X_tmp, n, zero_${rk},ドル X, n)`
	`116`	`+ #:endif`
	`117`	`+ do concurrent(i=1:n, j=1:n)`
	`118`	`+ A(i, j) = A(i, j) + cX(i, j) ! E = E + cX`
	`119`	`+ enddo`
	`120`	`+ if (p) then`
`119`	`121`	`do concurrent(i=1:n, j=1:n)`
`120`		`- A(i, j) = A(i, j) + cX(i, j) ! E = E + cX`
	`122`	`+ Q(i, j) = Q(i, j) + cX(i, j) ! Q = Q + cX`
`121`	`123`	`enddo`
`122`		`- if (p) then`
`123`		`- do concurrent(i=1:n, j=1:n)`
`124`		`- Q(i, j) = Q(i, j) + cX(i, j) ! Q = Q + cX`
`125`		`- enddo`
`126`		`- else`
`127`		`- do concurrent(i=1:n, j=1:n)`
`128`		`- Q(i, j) = Q(i, j) - cX(i, j) ! Q = Q - cX`
`129`		`- enddo`
`130`		`- endif`
`131`		`- p = .not. p`
`132`		`- enddo`
`133`		`- end block`
	`124`	`+ else`
	`125`	`+ do concurrent(i=1:n, j=1:n)`
	`126`	`+ Q(i, j) = Q(i, j) - cX(i, j) ! Q = Q - cX`
	`127`	`+ enddo`
	`128`	`+ endif`
	`129`	`+ p = .not. p`
	`130`	`+ enddo`
`134`	`131`
`135`	`132`	`block`
`136`	`133`	`integer(ilp) :: ipiv(n), info`
`@@ -139,17 +136,14 @@ contains`
`139`	`136`	`end block`
`140`	`137`
`141`	`138`	`! Matrix squaring.`
`142`		`- block`
`143`		`- ${rt},ドル allocatable :: E_tmp(:, :)`
`144`		`- do k = 1, s`
`145`		`- E_tmp = A`
`146`		`- #:if rt.startswith('complex')`
`147`		`- call gemm("N", "N", n, n, n, one_c${rk},ドル E_tmp, n, E_tmp, n, zero_c${rk},ドル A, n)`
`148`		`- #:else`
`149`		`- call gemm("N", "N", n, n, n, one_${rk},ドル E_tmp, n, E_tmp, n, zero_${rk},ドル A, n)`
`150`		`- #:endif`
`151`		`- enddo`
`152`		`- end block`
	`139`	`+ do k = 1, s`
	`140`	`+ X = A ! Re-use X to minimize allocations.`
	`141`	`+ #:if rt.startswith('complex')`
	`142`	`+ call gemm("N", "N", n, n, n, one_c${rk},ドル X, n, X, n, zero_c${rk},ドル A, n)`
	`143`	`+ #:else`
	`144`	`+ call gemm("N", "N", n, n, n, one_${rk},ドル X, n, X, n, zero_${rk},ドル A, n)`
	`145`	`+ #:endif`
	`146`	`+ enddo`
`153`	`147`	`endif`
`154`	`148`
`155`	`149`	`call linalg_error_handling(err0, err)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 3192a16

File tree

2 files changed

2 files changed

`‎src/stdlib_linalg.fypp‎`

`‎src/stdlib_linalg_matrix_functions.fypp‎`

0 commit comments